Spaces:
Running
Running
File size: 5,564 Bytes
2414d31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """
Rule-based user simulator for ClarifyRL.
Given a free-text clarifying question + the hidden profile + the task family,
return a short natural-language answer and the profile field that was revealed
(or None if the question didn't match any field the user knows).
Pure-Python, deterministic, sub-millisecond. No LLM call.
"""
from __future__ import annotations
from typing import Any, Optional
FIELD_KEYWORDS: dict[str, list[str]] = {
"stack": ["stack", "language", "framework", "tech", "what to build it in"],
"scale": ["scale", "users", "traffic", "load", "rps", "concurrent"],
"auth": ["auth", "authentication", "login", "sso", "jwt", "oauth"],
"datastore": ["database", "db", "storage", "persist", "data store"],
"deployment_target": ["deploy", "host", "hosting", "cloud", "aws", "kubernetes", "where to run"],
"language_version": ["version", "python version", "node version", "runtime version"],
"test_coverage": ["test", "coverage", "testing", "qa"],
"primary_symptom": ["symptom", "what's wrong", "what hurts", "how do you feel", "what is the issue"],
"duration": ["how long", "since when", "duration", "when did", "started"],
"severity": ["severe", "mild", "intense", "how bad", "severity", "scale of pain"],
"age_band": ["age", "how old", "young", "elderly", "child", "adult"],
"prior_conditions": ["history", "prior condition", "medical history", "pre-existing", "chronic"],
"medications": ["medication", "meds", "drugs", "prescription", "taking anything"],
"order_id": ["order id", "order number", "order #", "reference", "tracking", "which order"],
"item_issue": ["what's wrong with", "what happened", "damaged", "missing", "wrong", "issue with the order", "problem with"],
"refund_or_replace": ["refund", "replace", "return", "credit", "what would you like", "resolution"],
"urgency": ["when do you need", "need by", "urgent", "asap", "how soon", "urgency"],
"channel_preferred": ["contact", "reach you", "email or phone", "how should we", "channel"],
"participants": ["who", "participants", "attend", "join", "invite", "attendees"],
"date": ["what day", "which day", "date", "when (day)", "what date"],
"time": ["what time", "which time", "hour", "morning or afternoon"],
"duration_minutes": ["how long", "duration", "minutes", "length"],
"platform": ["zoom", "platform", "in person", "in-person", "where (online)", "virtual or"],
"event_type": ["what kind of event", "kind", "type of event", "occasion"],
"guest_count": ["how many", "guest", "headcount", "size", "people"],
"venue": ["where", "venue", "location", "place"],
"budget_band": ["budget", "cost", "spend", "price", "how much"],
"theme": ["theme", "vibe", "style", "formal or casual"],
"dietary_constraints": ["diet", "vegetarian", "vegan", "food restriction", "allergies", "dietary"],
}
_FIELD_PHRASING: dict[str, str] = {
"stack": "I'd like to use {value}",
"scale": "Expecting around {value}",
"auth": "Auth should be {value}",
"datastore": "Use {value}",
"deployment_target": "Deploy to {value}",
"language_version": "Use {value}",
"test_coverage": "{value} tests",
"primary_symptom": "It's a {value}",
"duration": "About {value}",
"severity": "I'd say {value}",
"age_band": "I'm a {value}",
"prior_conditions": "{value}",
"medications": "{value}",
"order_id": "Order {value}",
"item_issue": "{value}",
"refund_or_replace": "I'd prefer a {value}",
"urgency": "Urgency is {value}",
"channel_preferred": "Please reach me by {value}",
"participants": "{value}",
"date": "{value}",
"time": "{value}",
"duration_minutes": "{value} minutes",
"platform": "{value}",
"event_type": "A {value}",
"guest_count": "About {value} people",
"venue": "At a {value}",
"budget_band": "Budget around {value}",
"theme": "{value}",
"dietary_constraints": "{value}",
}
_NO_MATCH_REPLIES: dict[str, str] = {
"coding_requirements": "I don't have a strong preference on that — pick something reasonable.",
"medical_intake": "I'm not sure about that, sorry.",
"support_triage": "I don't really know — does it matter?",
"meeting_scheduling": "No preference, you choose.",
"event_planning": "Up to you on that one.",
}
def _normalize(text: str) -> str:
return " ".join(text.lower().split())
def match_field(question: str, allowed_keys: list[str]) -> Optional[str]:
q = _normalize(question)
best_score = -1
best_field: Optional[str] = None
for field_key in allowed_keys:
for kw in FIELD_KEYWORDS.get(field_key, ()):
if kw in q and len(kw) > best_score:
best_score = len(kw)
best_field = field_key
return best_field
def format_answer(field_key: str, value: Any, family: str) -> str:
del family
phrasing = _FIELD_PHRASING.get(field_key, "{value}")
text = phrasing.format(value=value).strip()
if not text.endswith((".", "!", "?")):
text += "."
return text
def answer(
question: str,
hidden_profile: dict[str, Any],
family: str,
) -> tuple[str, Optional[str]]:
profile_keys = list(hidden_profile.keys())
matched = match_field(question, profile_keys)
if matched is None:
return _NO_MATCH_REPLIES.get(family, "I don't know."), None
return format_answer(matched, hidden_profile[matched], family), matched
__all__ = [
"FIELD_KEYWORDS",
"match_field",
"format_answer",
"answer",
]
|