clarify-rl / server /user_simulator.py
Anurag Agarwal
ClarifyRL: initial HF Space deploy
2414d31
"""
Rule-based user simulator for ClarifyRL.
Given a free-text clarifying question + the hidden profile + the task family,
return a short natural-language answer and the profile field that was revealed
(or None if the question didn't match any field the user knows).
Pure-Python, deterministic, sub-millisecond. No LLM call.
"""
from __future__ import annotations
from typing import Any, Optional
FIELD_KEYWORDS: dict[str, list[str]] = {
"stack": ["stack", "language", "framework", "tech", "what to build it in"],
"scale": ["scale", "users", "traffic", "load", "rps", "concurrent"],
"auth": ["auth", "authentication", "login", "sso", "jwt", "oauth"],
"datastore": ["database", "db", "storage", "persist", "data store"],
"deployment_target": ["deploy", "host", "hosting", "cloud", "aws", "kubernetes", "where to run"],
"language_version": ["version", "python version", "node version", "runtime version"],
"test_coverage": ["test", "coverage", "testing", "qa"],
"primary_symptom": ["symptom", "what's wrong", "what hurts", "how do you feel", "what is the issue"],
"duration": ["how long", "since when", "duration", "when did", "started"],
"severity": ["severe", "mild", "intense", "how bad", "severity", "scale of pain"],
"age_band": ["age", "how old", "young", "elderly", "child", "adult"],
"prior_conditions": ["history", "prior condition", "medical history", "pre-existing", "chronic"],
"medications": ["medication", "meds", "drugs", "prescription", "taking anything"],
"order_id": ["order id", "order number", "order #", "reference", "tracking", "which order"],
"item_issue": ["what's wrong with", "what happened", "damaged", "missing", "wrong", "issue with the order", "problem with"],
"refund_or_replace": ["refund", "replace", "return", "credit", "what would you like", "resolution"],
"urgency": ["when do you need", "need by", "urgent", "asap", "how soon", "urgency"],
"channel_preferred": ["contact", "reach you", "email or phone", "how should we", "channel"],
"participants": ["who", "participants", "attend", "join", "invite", "attendees"],
"date": ["what day", "which day", "date", "when (day)", "what date"],
"time": ["what time", "which time", "hour", "morning or afternoon"],
"duration_minutes": ["how long", "duration", "minutes", "length"],
"platform": ["zoom", "platform", "in person", "in-person", "where (online)", "virtual or"],
"event_type": ["what kind of event", "kind", "type of event", "occasion"],
"guest_count": ["how many", "guest", "headcount", "size", "people"],
"venue": ["where", "venue", "location", "place"],
"budget_band": ["budget", "cost", "spend", "price", "how much"],
"theme": ["theme", "vibe", "style", "formal or casual"],
"dietary_constraints": ["diet", "vegetarian", "vegan", "food restriction", "allergies", "dietary"],
}
_FIELD_PHRASING: dict[str, str] = {
"stack": "I'd like to use {value}",
"scale": "Expecting around {value}",
"auth": "Auth should be {value}",
"datastore": "Use {value}",
"deployment_target": "Deploy to {value}",
"language_version": "Use {value}",
"test_coverage": "{value} tests",
"primary_symptom": "It's a {value}",
"duration": "About {value}",
"severity": "I'd say {value}",
"age_band": "I'm a {value}",
"prior_conditions": "{value}",
"medications": "{value}",
"order_id": "Order {value}",
"item_issue": "{value}",
"refund_or_replace": "I'd prefer a {value}",
"urgency": "Urgency is {value}",
"channel_preferred": "Please reach me by {value}",
"participants": "{value}",
"date": "{value}",
"time": "{value}",
"duration_minutes": "{value} minutes",
"platform": "{value}",
"event_type": "A {value}",
"guest_count": "About {value} people",
"venue": "At a {value}",
"budget_band": "Budget around {value}",
"theme": "{value}",
"dietary_constraints": "{value}",
}
_NO_MATCH_REPLIES: dict[str, str] = {
"coding_requirements": "I don't have a strong preference on that — pick something reasonable.",
"medical_intake": "I'm not sure about that, sorry.",
"support_triage": "I don't really know — does it matter?",
"meeting_scheduling": "No preference, you choose.",
"event_planning": "Up to you on that one.",
}
def _normalize(text: str) -> str:
return " ".join(text.lower().split())
def match_field(question: str, allowed_keys: list[str]) -> Optional[str]:
q = _normalize(question)
best_score = -1
best_field: Optional[str] = None
for field_key in allowed_keys:
for kw in FIELD_KEYWORDS.get(field_key, ()):
if kw in q and len(kw) > best_score:
best_score = len(kw)
best_field = field_key
return best_field
def format_answer(field_key: str, value: Any, family: str) -> str:
del family
phrasing = _FIELD_PHRASING.get(field_key, "{value}")
text = phrasing.format(value=value).strip()
if not text.endswith((".", "!", "?")):
text += "."
return text
def answer(
question: str,
hidden_profile: dict[str, Any],
family: str,
) -> tuple[str, Optional[str]]:
profile_keys = list(hidden_profile.keys())
matched = match_field(question, profile_keys)
if matched is None:
return _NO_MATCH_REPLIES.get(family, "I don't know."), None
return format_answer(matched, hidden_profile[matched], family), matched
__all__ = [
"FIELD_KEYWORDS",
"match_field",
"format_answer",
"answer",
]