File size: 5,564 Bytes
2414d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
Rule-based user simulator for ClarifyRL.

Given a free-text clarifying question + the hidden profile + the task family,
return a short natural-language answer and the profile field that was revealed
(or None if the question didn't match any field the user knows).

Pure-Python, deterministic, sub-millisecond. No LLM call.
"""

from __future__ import annotations

from typing import Any, Optional


FIELD_KEYWORDS: dict[str, list[str]] = {
    "stack": ["stack", "language", "framework", "tech", "what to build it in"],
    "scale": ["scale", "users", "traffic", "load", "rps", "concurrent"],
    "auth": ["auth", "authentication", "login", "sso", "jwt", "oauth"],
    "datastore": ["database", "db", "storage", "persist", "data store"],
    "deployment_target": ["deploy", "host", "hosting", "cloud", "aws", "kubernetes", "where to run"],
    "language_version": ["version", "python version", "node version", "runtime version"],
    "test_coverage": ["test", "coverage", "testing", "qa"],

    "primary_symptom": ["symptom", "what's wrong", "what hurts", "how do you feel", "what is the issue"],
    "duration": ["how long", "since when", "duration", "when did", "started"],
    "severity": ["severe", "mild", "intense", "how bad", "severity", "scale of pain"],
    "age_band": ["age", "how old", "young", "elderly", "child", "adult"],
    "prior_conditions": ["history", "prior condition", "medical history", "pre-existing", "chronic"],
    "medications": ["medication", "meds", "drugs", "prescription", "taking anything"],

    "order_id": ["order id", "order number", "order #", "reference", "tracking", "which order"],
    "item_issue": ["what's wrong with", "what happened", "damaged", "missing", "wrong", "issue with the order", "problem with"],
    "refund_or_replace": ["refund", "replace", "return", "credit", "what would you like", "resolution"],
    "urgency": ["when do you need", "need by", "urgent", "asap", "how soon", "urgency"],
    "channel_preferred": ["contact", "reach you", "email or phone", "how should we", "channel"],

    "participants": ["who", "participants", "attend", "join", "invite", "attendees"],
    "date": ["what day", "which day", "date", "when (day)", "what date"],
    "time": ["what time", "which time", "hour", "morning or afternoon"],
    "duration_minutes": ["how long", "duration", "minutes", "length"],
    "platform": ["zoom", "platform", "in person", "in-person", "where (online)", "virtual or"],

    "event_type": ["what kind of event", "kind", "type of event", "occasion"],
    "guest_count": ["how many", "guest", "headcount", "size", "people"],
    "venue": ["where", "venue", "location", "place"],
    "budget_band": ["budget", "cost", "spend", "price", "how much"],
    "theme": ["theme", "vibe", "style", "formal or casual"],
    "dietary_constraints": ["diet", "vegetarian", "vegan", "food restriction", "allergies", "dietary"],
}


_FIELD_PHRASING: dict[str, str] = {
    "stack": "I'd like to use {value}",
    "scale": "Expecting around {value}",
    "auth": "Auth should be {value}",
    "datastore": "Use {value}",
    "deployment_target": "Deploy to {value}",
    "language_version": "Use {value}",
    "test_coverage": "{value} tests",

    "primary_symptom": "It's a {value}",
    "duration": "About {value}",
    "severity": "I'd say {value}",
    "age_band": "I'm a {value}",
    "prior_conditions": "{value}",
    "medications": "{value}",

    "order_id": "Order {value}",
    "item_issue": "{value}",
    "refund_or_replace": "I'd prefer a {value}",
    "urgency": "Urgency is {value}",
    "channel_preferred": "Please reach me by {value}",

    "participants": "{value}",
    "date": "{value}",
    "time": "{value}",
    "duration_minutes": "{value} minutes",
    "platform": "{value}",

    "event_type": "A {value}",
    "guest_count": "About {value} people",
    "venue": "At a {value}",
    "budget_band": "Budget around {value}",
    "theme": "{value}",
    "dietary_constraints": "{value}",
}


_NO_MATCH_REPLIES: dict[str, str] = {
    "coding_requirements": "I don't have a strong preference on that — pick something reasonable.",
    "medical_intake": "I'm not sure about that, sorry.",
    "support_triage": "I don't really know — does it matter?",
    "meeting_scheduling": "No preference, you choose.",
    "event_planning": "Up to you on that one.",
}


def _normalize(text: str) -> str:
    return " ".join(text.lower().split())


def match_field(question: str, allowed_keys: list[str]) -> Optional[str]:
    q = _normalize(question)
    best_score = -1
    best_field: Optional[str] = None
    for field_key in allowed_keys:
        for kw in FIELD_KEYWORDS.get(field_key, ()):
            if kw in q and len(kw) > best_score:
                best_score = len(kw)
                best_field = field_key
    return best_field


def format_answer(field_key: str, value: Any, family: str) -> str:
    del family
    phrasing = _FIELD_PHRASING.get(field_key, "{value}")
    text = phrasing.format(value=value).strip()
    if not text.endswith((".", "!", "?")):
        text += "."
    return text


def answer(
    question: str,
    hidden_profile: dict[str, Any],
    family: str,
) -> tuple[str, Optional[str]]:
    profile_keys = list(hidden_profile.keys())
    matched = match_field(question, profile_keys)
    if matched is None:
        return _NO_MATCH_REPLIES.get(family, "I don't know."), None
    return format_answer(matched, hidden_profile[matched], family), matched


__all__ = [
    "FIELD_KEYWORDS",
    "match_field",
    "format_answer",
    "answer",
]