adityagirishh commited on
Commit
f2c3a0c
Β·
1 Parent(s): a0e5c66

submission draft 1

Browse files
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.14-slim
2
 
3
  ENV PYTHONDONTWRITEBYTECODE=1
4
  ENV PYTHONUNBUFFERED=1
 
1
+ FROM python:3.12-slim
2
 
3
  ENV PYTHONDONTWRITEBYTECODE=1
4
  ENV PYTHONUNBUFFERED=1
app/grader.py CHANGED
@@ -741,7 +741,7 @@ def grade(state: EpisodeState) -> Dict[str, Any]:
741
  mc, mr = score_crm_accuracy(log, must_update_crm)
742
  dims["crm_accuracy"] = {"score": mc, "reason": mr, "weight": WEIGHTS["crm_accuracy"]}
743
 
744
- pc, pr = score_compliance(s.leads, log, s.lead_responses)
745
  dims["compliance"] = {"score": pc, "reason": pr, "weight": WEIGHTS["compliance"]}
746
 
747
  ec, er = score_efficiency(s.step_count, s.max_steps, task_completed and pc > 0.0)
 
741
  mc, mr = score_crm_accuracy(log, must_update_crm)
742
  dims["crm_accuracy"] = {"score": mc, "reason": mr, "weight": WEIGHTS["crm_accuracy"]}
743
 
744
+ pc, pr, _fatal = score_compliance(s.leads, log, s.lead_responses)
745
  dims["compliance"] = {"score": pc, "reason": pr, "weight": WEIGHTS["compliance"]}
746
 
747
  ec, er = score_efficiency(s.step_count, s.max_steps, task_completed and pc > 0.0)
inference.py CHANGED
@@ -17,8 +17,12 @@ STDOUT FORMAT
17
  [END] success=<true|false> steps=<n> score=<0.000> rewards=<r1,r2,...,rn>
18
  """
19
 
 
 
20
  from __future__ import annotations
21
 
 
 
22
  import asyncio
23
  import json
24
  import os
@@ -162,7 +166,8 @@ Step {step_num}/{max_steps}
162
  Current observation:
163
  {json.dumps(observation, indent=2, default=str)}
164
 
165
- Based on this observation, choose your next action. Respond with a single JSON object.
 
166
  """).strip()
167
 
168
 
 
17
  [END] success=<true|false> steps=<n> score=<0.000> rewards=<r1,r2,...,rn>
18
  """
19
 
20
+
21
+
22
  from __future__ import annotations
23
 
24
+ from dotenv import load_dotenv
25
+ load_dotenv(".env.local")
26
  import asyncio
27
  import json
28
  import os
 
166
  Current observation:
167
  {json.dumps(observation, indent=2, default=str)}
168
 
169
+ Respond with ONLY a JSON object. No text before or after. Example format:
170
+ {{"action_type": "send_email", "target_lead_id": "L-001", "subject": "...", "body": "...", "goal": "...", "priority": "high"}}
171
  """).strip()
172
 
173
 
requirements.txt CHANGED
@@ -1,6 +1,110 @@
1
- fastapi>=0.104.0
2
- uvicorn[standard]>=0.24.0
3
- pydantic>=2.7.0
4
- requests>=2.25.0
5
- openai>=1.0.0
6
- openenv-core
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofile==3.9.0
2
+ aiofiles==24.1.0
3
+ annotated-doc==0.0.4
4
+ annotated-types==0.7.0
5
+ anyio==4.13.0
6
+ attrs==26.1.0
7
+ Authlib==1.6.9
8
+ beartype==0.22.9
9
+ brotli==1.2.0
10
+ cachetools==7.0.5
11
+ caio==0.9.25
12
+ certifi==2026.2.25
13
+ cffi==2.0.0
14
+ charset-normalizer==3.4.6
15
+ click==8.3.1
16
+ cryptography==46.0.6
17
+ cyclopts==4.10.1
18
+ distro==1.9.0
19
+ dnspython==2.8.0
20
+ docstring_parser==0.17.0
21
+ docutils==0.22.4
22
+ email-validator==2.3.0
23
+ exceptiongroup==1.3.1
24
+ fastapi==0.135.2
25
+ fastmcp==3.2.0
26
+ ffmpy==1.0.0
27
+ filelock==3.25.2
28
+ fsspec==2026.3.0
29
+ gradio==6.11.0
30
+ gradio_client==2.4.0
31
+ groovy==0.1.2
32
+ h11==0.16.0
33
+ hf-gradio==0.3.0
34
+ hf-xet==1.4.3
35
+ httpcore==1.0.9
36
+ httptools==0.7.1
37
+ httpx==0.28.1
38
+ httpx-sse==0.4.3
39
+ huggingface_hub==1.9.0
40
+ idna==3.11
41
+ importlib_metadata==8.7.1
42
+ iniconfig==2.3.0
43
+ jaraco.classes==3.4.0
44
+ jaraco.context==6.1.2
45
+ jaraco.functools==4.4.0
46
+ Jinja2==3.1.6
47
+ jiter==0.13.0
48
+ jsonref==1.1.0
49
+ jsonschema==4.26.0
50
+ jsonschema-path==0.4.5
51
+ jsonschema-specifications==2025.9.1
52
+ keyring==25.7.0
53
+ markdown-it-py==4.0.0
54
+ MarkupSafe==3.0.3
55
+ mcp==1.27.0
56
+ mdurl==0.1.2
57
+ more-itertools==11.0.1
58
+ numpy==2.4.4
59
+ openai==2.30.0
60
+ openapi-pydantic==0.5.1
61
+ openenv-core==0.2.3
62
+ opentelemetry-api==1.40.0
63
+ orjson==3.11.8
64
+ packaging==26.0
65
+ pandas==3.0.2
66
+ pathable==0.5.0
67
+ pillow==12.2.0
68
+ platformdirs==4.9.4
69
+ pluggy==1.6.0
70
+ py-key-value-aio==0.4.4
71
+ pycparser==3.0
72
+ pydantic==2.12.5
73
+ pydantic-settings==2.13.1
74
+ pydantic_core==2.41.5
75
+ pydub==0.25.1
76
+ Pygments==2.20.0
77
+ PyJWT==2.12.1
78
+ pyperclip==1.11.0
79
+ pytest==9.0.2
80
+ python-dateutil==2.9.0.post0
81
+ python-dotenv==1.2.2
82
+ python-multipart==0.0.22
83
+ pytz==2026.1.post1
84
+ PyYAML==6.0.3
85
+ referencing==0.37.0
86
+ requests==2.33.1
87
+ rich==14.3.3
88
+ rich-rst==1.3.2
89
+ rpds-py==0.30.0
90
+ safehttpx==0.1.7
91
+ semantic-version==2.10.0
92
+ shellingham==1.5.4
93
+ six==1.17.0
94
+ sniffio==1.3.1
95
+ sse-starlette==3.3.4
96
+ starlette==1.0.0
97
+ tomli==2.4.1
98
+ tomli_w==1.2.0
99
+ tomlkit==0.13.3
100
+ tqdm==4.67.3
101
+ typer==0.24.1
102
+ typing-inspection==0.4.2
103
+ typing_extensions==4.15.0
104
+ uncalled-for==0.2.0
105
+ urllib3==2.6.3
106
+ uvicorn==0.42.0
107
+ uvloop==0.22.1
108
+ watchfiles==1.1.1
109
+ websockets==16.0
110
+ zipp==3.23.0
tests/test_api.py CHANGED
@@ -92,18 +92,19 @@ class TestStateEndpoint:
92
 
93
 
94
  class TestTasksEndpoint:
95
- def test_tasks_returns_5(self):
96
  resp = client.get("/tasks")
97
  assert resp.status_code == 200
98
  data = resp.json()
99
  assert "tasks" in data
100
- assert len(data["tasks"]) == 5
101
  task_ids = [t["task_id"] for t in data["tasks"]]
102
  assert "easy" in task_ids
103
  assert "medium" in task_ids
104
  assert "hard" in task_ids
105
  assert "escalation" in task_ids
106
  assert "consent" in task_ids
 
107
 
108
 
109
  class TestGraderEndpoint:
 
92
 
93
 
94
  class TestTasksEndpoint:
95
+ def test_tasks_returns_all(self):
96
  resp = client.get("/tasks")
97
  assert resp.status_code == 200
98
  data = resp.json()
99
  assert "tasks" in data
100
+ assert len(data["tasks"]) == 6
101
  task_ids = [t["task_id"] for t in data["tasks"]]
102
  assert "easy" in task_ids
103
  assert "medium" in task_ids
104
  assert "hard" in task_ids
105
  assert "escalation" in task_ids
106
  assert "consent" in task_ids
107
+ assert "adversarial" in task_ids
108
 
109
 
110
  class TestGraderEndpoint:
tests/test_grader.py CHANGED
@@ -35,6 +35,13 @@ class TestGraderEasy:
35
  goal="intro",
36
  priority="high",
37
  ),
 
 
 
 
 
 
 
38
  ])
39
  assert result["pass"] is True
40
  assert result["score"] >= 0.60
@@ -119,27 +126,21 @@ class TestGraderHard:
119
  body="Hi Rajan, following up on the proposal for Apex Ventures.",
120
  goal="proposal_followup", priority="high",
121
  ),
 
 
 
 
 
122
  Action(
123
  action_type="request_documents", target_lead_id="L-007",
124
  subject="Documents β€” Apex Ventures",
125
  body="Hi Rajan, need the signed NDA and procurement form.",
126
  goal="get_documents",
127
  ),
128
- Action(
129
- action_type="send_message", target_lead_id="L-007",
130
- body="Hi Rajan β€” sent the docs request via email. Let me know.",
131
- goal="keep_warm",
132
- ),
133
  Action(
134
  action_type="update_crm", target_lead_id="L-007",
135
  goal="log_interaction",
136
- metadata={"note": "Follow-up done. Docs requested. Targeting close."},
137
- ),
138
- Action(
139
- action_type="make_call", target_lead_id="L-007",
140
- body="Hi Rajan, documents received β€” ready to move forward?",
141
- goal="close_deal",
142
- metadata={"docs_received": True},
143
  ),
144
  ])
145
  assert result["pass"] is True
 
35
  goal="intro",
36
  priority="high",
37
  ),
38
+ Action(
39
+ action_type="request_documents",
40
+ target_lead_id="L-001",
41
+ subject="Documents β€” NovaTech",
42
+ body="Hi Arun, could you share the NDA and any pending documents so we can proceed?",
43
+ goal="get_documents",
44
+ ),
45
  ])
46
  assert result["pass"] is True
47
  assert result["score"] >= 0.60
 
126
  body="Hi Rajan, following up on the proposal for Apex Ventures.",
127
  goal="proposal_followup", priority="high",
128
  ),
129
+ Action(
130
+ action_type="send_message", target_lead_id="L-008",
131
+ body="Hi Sunita, reviewing the contract terms for CloudMatrix β€” happy to walk through each clause.",
132
+ goal="handle_terms_objection", priority="high",
133
+ ),
134
  Action(
135
  action_type="request_documents", target_lead_id="L-007",
136
  subject="Documents β€” Apex Ventures",
137
  body="Hi Rajan, need the signed NDA and procurement form.",
138
  goal="get_documents",
139
  ),
 
 
 
 
 
140
  Action(
141
  action_type="update_crm", target_lead_id="L-007",
142
  goal="log_interaction",
143
+ metadata={"note": "L-007: call + docs requested. L-008: terms objection handled."},
 
 
 
 
 
 
144
  ),
145
  ])
146
  assert result["pass"] is True
tests/test_rewards.py CHANGED
@@ -103,7 +103,7 @@ class TestMessageQuality:
103
  product = make_product()
104
  action = Action(action_type="send_email", target_lead_id="L-001", body="Hello there")
105
  score, _ = reward_message_quality(action, lead, product)
106
- assert score >= 0.05
107
 
108
  def test_personalised_body(self):
109
  lead = make_lead()
@@ -113,18 +113,20 @@ class TestMessageQuality:
113
  body="Hi Arun, reaching out about multi-channel outreach for NovaTech"
114
  )
115
  score, _ = reward_message_quality(action, lead, product)
116
- assert abs(score - 0.15) < 1e-9 # body + personalised + product-relevant (no objection to address)
117
 
118
 
119
  class TestCrmUpdate:
120
  def test_crm_update_action(self):
 
121
  action = Action(action_type="update_crm", target_lead_id="L-001")
122
- score, _ = reward_crm_update(action)
123
- assert score == 0.1
124
 
125
  def test_non_crm_action(self):
 
126
  action = Action(action_type="send_email", target_lead_id="L-001")
127
- score, _ = reward_crm_update(action)
128
  assert score == 0.0
129
 
130
 
@@ -170,23 +172,21 @@ class TestDuplicateOutreach:
170
  assert score == 0.0
171
 
172
  def test_duplicate(self):
173
- # New API: duplicate detection uses prev_action_type + prev_goal, not lead history
174
  lead = make_lead()
175
  action = Action(action_type="send_email", target_lead_id="L-001", goal="intro")
176
  score, _ = reward_duplicate_outreach(
177
  action, lead,
178
  prev_action_type="send_email",
179
- prev_goal="intro",
180
  )
181
  assert score == -0.2
182
 
183
- def test_different_goal_not_duplicate(self):
184
  lead = make_lead()
185
- action = Action(action_type="send_email", target_lead_id="L-001", goal="get_documents")
186
  score, _ = reward_duplicate_outreach(
187
  action, lead,
188
  prev_action_type="send_email",
189
- prev_goal="intro",
190
  )
191
  assert score == 0.0
192
 
 
103
  product = make_product()
104
  action = Action(action_type="send_email", target_lead_id="L-001", body="Hello there")
105
  score, _ = reward_message_quality(action, lead, product)
106
+ assert score >= 0.04
107
 
108
  def test_personalised_body(self):
109
  lead = make_lead()
 
113
  body="Hi Arun, reaching out about multi-channel outreach for NovaTech"
114
  )
115
  score, _ = reward_message_quality(action, lead, product)
116
+ assert abs(score - 0.16) < 1e-9 # body + personalised + product-relevant + tone (no objection to address)
117
 
118
 
119
  class TestCrmUpdate:
120
  def test_crm_update_action(self):
121
+ lead = make_lead()
122
  action = Action(action_type="update_crm", target_lead_id="L-001")
123
+ score, _ = reward_crm_update(action, lead)
124
+ assert score == 0.15
125
 
126
  def test_non_crm_action(self):
127
+ lead = make_lead()
128
  action = Action(action_type="send_email", target_lead_id="L-001")
129
+ score, _ = reward_crm_update(action, lead)
130
  assert score == 0.0
131
 
132
 
 
172
  assert score == 0.0
173
 
174
  def test_duplicate(self):
175
+ # Duplicate detection: same lead + same channel back-to-back
176
  lead = make_lead()
177
  action = Action(action_type="send_email", target_lead_id="L-001", goal="intro")
178
  score, _ = reward_duplicate_outreach(
179
  action, lead,
180
  prev_action_type="send_email",
 
181
  )
182
  assert score == -0.2
183
 
184
+ def test_different_channel_not_duplicate(self):
185
  lead = make_lead()
186
+ action = Action(action_type="make_call", target_lead_id="L-001", goal="get_documents")
187
  score, _ = reward_duplicate_outreach(
188
  action, lead,
189
  prev_action_type="send_email",
 
190
  )
191
  assert score == 0.0
192