Spaces:

agarwalanu3103
/

clarify-rl

Running

Anurag Agarwal

ClarifyRL: initial HF Space deploy

2414d31 18 days ago

5.56 kB

	"""
	Rule-based user simulator for ClarifyRL.

	Given a free-text clarifying question + the hidden profile + the task family,
	return a short natural-language answer and the profile field that was revealed
	(or None if the question didn't match any field the user knows).

	Pure-Python, deterministic, sub-millisecond. No LLM call.
	"""

	from __future__ import annotations

	from typing import Any, Optional


	FIELD_KEYWORDS: dict[str, list[str]] = {
	"stack": ["stack", "language", "framework", "tech", "what to build it in"],
	"scale": ["scale", "users", "traffic", "load", "rps", "concurrent"],
	"auth": ["auth", "authentication", "login", "sso", "jwt", "oauth"],
	"datastore": ["database", "db", "storage", "persist", "data store"],
	"deployment_target": ["deploy", "host", "hosting", "cloud", "aws", "kubernetes", "where to run"],
	"language_version": ["version", "python version", "node version", "runtime version"],
	"test_coverage": ["test", "coverage", "testing", "qa"],

	"primary_symptom": ["symptom", "what's wrong", "what hurts", "how do you feel", "what is the issue"],
	"duration": ["how long", "since when", "duration", "when did", "started"],
	"severity": ["severe", "mild", "intense", "how bad", "severity", "scale of pain"],
	"age_band": ["age", "how old", "young", "elderly", "child", "adult"],
	"prior_conditions": ["history", "prior condition", "medical history", "pre-existing", "chronic"],
	"medications": ["medication", "meds", "drugs", "prescription", "taking anything"],

	"order_id": ["order id", "order number", "order #", "reference", "tracking", "which order"],
	"item_issue": ["what's wrong with", "what happened", "damaged", "missing", "wrong", "issue with the order", "problem with"],
	"refund_or_replace": ["refund", "replace", "return", "credit", "what would you like", "resolution"],
	"urgency": ["when do you need", "need by", "urgent", "asap", "how soon", "urgency"],
	"channel_preferred": ["contact", "reach you", "email or phone", "how should we", "channel"],

	"participants": ["who", "participants", "attend", "join", "invite", "attendees"],
	"date": ["what day", "which day", "date", "when (day)", "what date"],
	"time": ["what time", "which time", "hour", "morning or afternoon"],
	"duration_minutes": ["how long", "duration", "minutes", "length"],
	"platform": ["zoom", "platform", "in person", "in-person", "where (online)", "virtual or"],

	"event_type": ["what kind of event", "kind", "type of event", "occasion"],
	"guest_count": ["how many", "guest", "headcount", "size", "people"],
	"venue": ["where", "venue", "location", "place"],
	"budget_band": ["budget", "cost", "spend", "price", "how much"],
	"theme": ["theme", "vibe", "style", "formal or casual"],
	"dietary_constraints": ["diet", "vegetarian", "vegan", "food restriction", "allergies", "dietary"],
	}


	_FIELD_PHRASING: dict[str, str] = {
	"stack": "I'd like to use {value}",
	"scale": "Expecting around {value}",
	"auth": "Auth should be {value}",
	"datastore": "Use {value}",
	"deployment_target": "Deploy to {value}",
	"language_version": "Use {value}",
	"test_coverage": "{value} tests",

	"primary_symptom": "It's a {value}",
	"duration": "About {value}",
	"severity": "I'd say {value}",
	"age_band": "I'm a {value}",
	"prior_conditions": "{value}",
	"medications": "{value}",

	"order_id": "Order {value}",
	"item_issue": "{value}",
	"refund_or_replace": "I'd prefer a {value}",
	"urgency": "Urgency is {value}",
	"channel_preferred": "Please reach me by {value}",

	"participants": "{value}",
	"date": "{value}",
	"time": "{value}",
	"duration_minutes": "{value} minutes",
	"platform": "{value}",

	"event_type": "A {value}",
	"guest_count": "About {value} people",
	"venue": "At a {value}",
	"budget_band": "Budget around {value}",
	"theme": "{value}",
	"dietary_constraints": "{value}",
	}


	_NO_MATCH_REPLIES: dict[str, str] = {
	"coding_requirements": "I don't have a strong preference on that — pick something reasonable.",
	"medical_intake": "I'm not sure about that, sorry.",
	"support_triage": "I don't really know — does it matter?",
	"meeting_scheduling": "No preference, you choose.",
	"event_planning": "Up to you on that one.",
	}


	def _normalize(text: str) -> str:
	return " ".join(text.lower().split())


	def match_field(question: str, allowed_keys: list[str]) -> Optional[str]:
	q = _normalize(question)
	best_score = -1
	best_field: Optional[str] = None
	for field_key in allowed_keys:
	for kw in FIELD_KEYWORDS.get(field_key, ()):
	if kw in q and len(kw) > best_score:
	best_score = len(kw)
	best_field = field_key
	return best_field


	def format_answer(field_key: str, value: Any, family: str) -> str:
	del family
	phrasing = _FIELD_PHRASING.get(field_key, "{value}")
	text = phrasing.format(value=value).strip()
	if not text.endswith((".", "!", "?")):
	text += "."
	return text


	def answer(
	question: str,
	hidden_profile: dict[str, Any],
	family: str,
	) -> tuple[str, Optional[str]]:
	profile_keys = list(hidden_profile.keys())
	matched = match_field(question, profile_keys)
	if matched is None:
	return _NO_MATCH_REPLIES.get(family, "I don't know."), None
	return format_answer(matched, hidden_profile[matched], family), matched


	__all__ = [
	"FIELD_KEYWORDS",
	"match_field",
	"format_answer",
	"answer",
	]