Nanny7 commited on
Commit
60af04f
Β·
1 Parent(s): b0fc11e

phase2 fix

Browse files
Dockerfile CHANGED
@@ -17,12 +17,7 @@ COPY requirements.txt .
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Copy application code
20
- COPY models.py .
21
- COPY dataset.py .
22
- COPY graders.py .
23
- COPY environment.py .
24
- COPY server.py .
25
- COPY openenv.yaml .
26
  COPY inference.py .
27
  COPY static/ ./static/
28
 
 
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Copy application code
20
+ COPY server/ ./server/
 
 
 
 
 
21
  COPY inference.py .
22
  COPY static/ ./static/
23
 
app.py CHANGED
@@ -6,3 +6,4 @@ We re-export from server.py.
6
  from server import app # noqa: F401
7
 
8
  # HF Spaces will pick up `app` and serve it on port 7860
 
 
6
  from server import app # noqa: F401
7
 
8
  # HF Spaces will pick up `app` and serve it on port 7860
9
+ if __name__ == "__main__": app.main()
inference.py CHANGED
@@ -1,109 +1,89 @@
1
  #!/usr/bin/env python3
2
-
3
  import os
4
- import sys
5
  import json
6
- from typing import Dict, Any
7
-
8
- from fastapi import FastAPI
9
- from pydantic import BaseModel
10
  from openai import OpenAI
11
 
12
- # ─── FastAPI App ─────────────────────────────────────────
13
- app = FastAPI()
14
 
15
- # ─── Environment Variables ───────────────────────────────
16
  API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
17
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
18
  HF_TOKEN = os.environ.get("HF_TOKEN", os.environ.get("OPENAI_API_KEY", ""))
19
 
20
- # ❗ Prevent crash if token missing
21
- if not HF_TOKEN:
22
- HF_TOKEN = "dummy-key"
23
-
24
  client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)
25
 
26
- # ─── Safe Imports (IMPORTANT FIX) ─────────────────────────
27
- sys.path.insert(0, os.path.dirname(__file__))
28
-
29
- try:
30
- from models import UrgencyLevel, EmailCategory, EmailAction
31
- except Exception:
32
- # fallback if import fails (prevents uvicorn crash)
33
- UrgencyLevel = EmailCategory = EmailAction = None
34
-
35
- # ─── Prompt ──────────────────────────────────────────────
36
- SYSTEM_PROMPT = """You are an expert email triage assistant.
37
-
38
- Return ONLY valid JSON with:
39
- - urgency
40
- - category
41
- - action
42
- - draft_reply (if reply)
43
- - forward_to (if forward/escalate)
44
- - reasoning
45
- """
46
-
47
- # ─── Request Schema ──────────────────────────────────────
48
- class InputData(BaseModel):
49
- input: Dict[str, Any]
50
-
51
- # ─── Helper Function ─────────────────────────────────────
52
- def clamp_enum(value: str, enum_cls):
53
- if enum_cls is None:
54
- return value # fallback if enums not available
55
-
56
- valid = {e.value for e in enum_cls}
57
- return value if value in valid else list(enum_cls)[0].value
58
-
59
- # ─── Agent Logic ─────────────────────────────────────────
60
- def agent_decide(email_data: Dict[str, Any]) -> Dict[str, Any]:
61
  try:
62
  response = client.chat.completions.create(
63
  model=MODEL_NAME,
64
  messages=[
65
  {"role": "system", "content": SYSTEM_PROMPT},
66
- {"role": "user", "content": json.dumps(email_data)},
67
  ],
68
- temperature=0.1,
69
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- raw = response.choices[0].message.content or "{}"
72
- return json.loads(raw)
73
-
74
- except Exception:
75
- return {
76
- "urgency": "medium",
77
- "category": "other",
78
- "action": "archive",
79
- "draft_reply": None,
80
- "forward_to": None,
81
- "reasoning": "fallback"
82
- }
83
-
84
- # ─── REQUIRED ENDPOINTS ──────────────────────────────────
85
-
86
- # βœ… FIXES YOUR ERROR
87
- @app.post("/reset")
88
- def reset():
89
- return {"status": "reset successful"}
90
-
91
-
92
- @app.post("/predict")
93
- def predict(data: InputData):
94
- email_data = data.input
95
-
96
- decision = agent_decide(email_data)
97
-
98
- urgency = clamp_enum(decision.get("urgency", "medium"), UrgencyLevel)
99
- category = clamp_enum(decision.get("category", "other"), EmailCategory)
100
- action = clamp_enum(decision.get("action", "archive"), EmailAction)
101
-
102
- return {
103
- "urgency": urgency,
104
- "category": category,
105
- "action": action,
106
- "draft_reply": decision.get("draft_reply"),
107
- "forward_to": decision.get("forward_to"),
108
- "reasoning": decision.get("reasoning", "")
109
- }
 
1
  #!/usr/bin/env python3
 
2
  import os
 
3
  import json
4
+ import time
 
 
 
5
  from openai import OpenAI
6
 
7
+ from server.environment import EmailTriageEnv
8
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
9
 
 
10
  API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
11
  MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
12
  HF_TOKEN = os.environ.get("HF_TOKEN", os.environ.get("OPENAI_API_KEY", ""))
13
 
 
 
 
 
14
  client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)
15
 
16
+ SYSTEM_PROMPT = "You are an email triage assistant. Return JSON."
17
+
18
+ def agent_decide(email):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
  response = client.chat.completions.create(
21
  model=MODEL_NAME,
22
  messages=[
23
  {"role": "system", "content": SYSTEM_PROMPT},
24
+ {"role": "user", "content": json.dumps(email)},
25
  ],
 
26
  )
27
+ return json.loads(response.choices[0].message.content)
28
+ except:
29
+ return {"urgency": "medium", "category": "other", "action": "archive"}
30
+
31
+ def run_task(task_id):
32
+ env = EmailTriageEnv()
33
+ obs = env.reset(task_id=task_id)
34
+
35
+ steps = []
36
+ step_num = 0
37
+
38
+ while not obs.done:
39
+ step_num += 1
40
+ email = obs.current_email or {}
41
+
42
+ decision = agent_decide(email)
43
+
44
+ act = Action(
45
+ urgency=UrgencyLevel(decision.get("urgency", "medium")),
46
+ category=EmailCategory(decision.get("category", "other")),
47
+ action=EmailAction(decision.get("action", "archive")),
48
+ )
49
+
50
+ result = env.step(act)
51
+
52
+ print(json.dumps({
53
+ "type": "[STEP]",
54
+ "task_id": task_id,
55
+ "step": step_num,
56
+ "reward": result.reward.value,
57
+ "done": result.done
58
+ }))
59
+
60
+ steps.append(result.reward.value)
61
+ obs = result.observation
62
+
63
+ return sum(steps) / len(steps) if steps else 0
64
+
65
+ def main():
66
+ tasks = ["task_easy", "task_medium", "task_hard"]
67
+
68
+ print(json.dumps({
69
+ "type": "[START]",
70
+ "tasks": tasks,
71
+ "timestamp": time.time()
72
+ }))
73
+
74
+ results = {}
75
+
76
+ for t in tasks:
77
+ score = run_task(t)
78
+ results[t] = score
79
+
80
+ overall = sum(results.values()) / len(results)
81
+
82
+ print(json.dumps({
83
+ "type": "[END]",
84
+ "overall_score": overall,
85
+ "task_scores": results
86
+ }))
87
 
88
+ if __name__ == "__main__":
89
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server.py DELETED
@@ -1,136 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
- from typing import Any, Dict, Optional
4
-
5
- from fastapi import FastAPI, HTTPException, Request
6
- from fastapi.responses import HTMLResponse
7
- from fastapi.middleware.cors import CORSMiddleware
8
-
9
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
10
- from environment import EmailTriageEnv
11
-
12
- # ─── App setup ─────────────────────────────────────────────
13
-
14
- app = FastAPI(
15
- title="OpenEnv Email Triage",
16
- version="1.0.0",
17
- )
18
-
19
- app.add_middleware(
20
- CORSMiddleware,
21
- allow_origins=["*"],
22
- allow_methods=["*"],
23
- allow_headers=["*"],
24
- )
25
-
26
- env = EmailTriageEnv()
27
-
28
- # ─── Endpoints ─────────────────────────────────────────────
29
-
30
- @app.get("/health")
31
- async def health():
32
- return {"status": "healthy"}
33
-
34
- @app.get("/metadata")
35
- async def metadata():
36
- return {
37
- "name": "OpenEnv Email Triage",
38
- "description": "AI-powered email triage environment that classifies emails by urgency, category, and action."
39
- }
40
-
41
- @app.post("/mcp")
42
- async def mcp():
43
- return {
44
- "jsonrpc": "2.0",
45
- "result": {
46
- "message": "MCP endpoint active"
47
- },
48
- "id": 1
49
- }
50
- @app.get("/schema")
51
- async def schema():
52
- return {
53
- "action": {
54
- "urgency": [e.value for e in UrgencyLevel],
55
- "category": [e.value for e in EmailCategory],
56
- "action": [e.value for e in EmailAction],
57
- "draft_reply": "string (optional)",
58
- "forward_to": "string (optional)",
59
- "reasoning": "string (optional)"
60
- },
61
- "observation": {
62
- "current_email": "object",
63
- "done": "boolean",
64
- "info": "object"
65
- },
66
- "state": {
67
- "emails_processed": "int",
68
- "current_step": "int",
69
- "task_id": "string"
70
- }
71
- }
72
-
73
- # βœ… FIXED RESET (IMPORTANT)
74
- @app.post("/reset")
75
- async def reset(request: Request):
76
- try:
77
- body = await request.json()
78
- task_id = body.get("task_id", "task_easy") if body else "task_easy"
79
- except:
80
- task_id = "task_easy"
81
-
82
- obs = env.reset(task_id=task_id)
83
- return obs.model_dump()
84
-
85
-
86
- # ─── STEP ENDPOINT ─────────────────────────────────────────
87
-
88
- @app.post("/step")
89
- async def step(request: Request):
90
- try:
91
- data = await request.json()
92
-
93
- urgency = UrgencyLevel(data.get("urgency", "medium"))
94
- category = EmailCategory(data.get("category", "other"))
95
- action = EmailAction(data.get("action", "archive"))
96
-
97
- act = Action(
98
- urgency=urgency,
99
- category=category,
100
- action=action,
101
- draft_reply=data.get("draft_reply"),
102
- forward_to=data.get("forward_to"),
103
- reasoning=data.get("reasoning"),
104
- )
105
-
106
- result = env.step(act)
107
- return result.model_dump()
108
-
109
- except Exception as e:
110
- raise HTTPException(status_code=400, detail=str(e))
111
-
112
-
113
- # ─── OTHER ENDPOINTS ───────────────────────────────────────
114
-
115
- @app.get("/state")
116
- async def state():
117
- return env.state().model_dump()
118
-
119
-
120
- @app.get("/tasks")
121
- async def list_tasks():
122
- return {
123
- "tasks": [
124
- {"id": "task_easy"},
125
- {"id": "task_medium"},
126
- {"id": "task_hard"},
127
- ]
128
- }
129
-
130
-
131
- @app.get("/")
132
- async def root():
133
- return {"message": "OpenEnv Email Triage API running"}
134
-
135
- def main():
136
- return app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
server/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (150 Bytes). View file
 
server/__pycache__/app.cpython-312.pyc ADDED
Binary file (5.38 kB). View file
 
server/__pycache__/dataset.cpython-312.pyc ADDED
Binary file (10.1 kB). View file
 
server/__pycache__/environment.cpython-312.pyc ADDED
Binary file (7.17 kB). View file
 
server/__pycache__/graders.cpython-312.pyc ADDED
Binary file (10.1 kB). View file
 
server/__pycache__/models.cpython-312.pyc ADDED
Binary file (6.73 kB). View file
 
server/app.py CHANGED
@@ -6,8 +6,8 @@ from fastapi import FastAPI, HTTPException, Request
6
  from fastapi.responses import HTMLResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
 
9
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
10
- from environment import EmailTriageEnv
11
 
12
  # ─── App setup ─────────────────────────────────────────────
13
 
@@ -135,5 +135,5 @@ async def root():
135
  def main():
136
  import uvicorn
137
  uvicorn.run(app)
138
-
139
- if __name__ == "__main__": main()
 
6
  from fastapi.responses import HTMLResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
 
9
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
10
+ from server.environment import EmailTriageEnv
11
 
12
  # ─── App setup ─────────────────────────────────────────────
13
 
 
135
  def main():
136
  import uvicorn
137
  uvicorn.run(app)
138
+
139
+ if __name__ == "__main__": main()
dataset.py β†’ server/dataset.py RENAMED
@@ -2,7 +2,7 @@
2
  Email dataset for all three tasks.
3
  Each email has ground truth labels hidden from the agent.
4
  """
5
- from models import Email
6
 
7
  # ─── TASK 1: Easy β€” Binary Spam Detection (10 emails) ─────────────────────────
8
 
 
2
  Email dataset for all three tasks.
3
  Each email has ground truth labels hidden from the agent.
4
  """
5
+ from server.models import Email
6
 
7
  # ─── TASK 1: Easy β€” Binary Spam Detection (10 emails) ─────────────────────────
8
 
environment.py β†’ server/environment.py RENAMED
@@ -5,11 +5,11 @@ Implements step() / reset() / state() interface.
5
  from __future__ import annotations
6
  import uuid
7
  from typing import Any, Dict, Optional, Tuple
8
- from models import (
9
  Action, Observation, Reward, StepResponse, EnvState, Email
10
  )
11
- from dataset import TASK_EMAILS
12
- from graders import grade
13
 
14
 
15
  class EmailTriageEnv:
 
5
  from __future__ import annotations
6
  import uuid
7
  from typing import Any, Dict, Optional, Tuple
8
+ from server.models import (
9
  Action, Observation, Reward, StepResponse, EnvState, Email
10
  )
11
+ from server.dataset import TASK_EMAILS
12
+ from server.graders import grade
13
 
14
 
15
  class EmailTriageEnv:
graders.py β†’ server/graders.py RENAMED
@@ -5,7 +5,7 @@ and a human-readable breakdown.
5
  """
6
  from __future__ import annotations
7
  from typing import Optional
8
- from models import Action, Reward, RewardBreakdown, Email
9
 
10
 
11
  # ─── Urgency proximity map (partial credit for close guesses) ─────────────────
 
5
  """
6
  from __future__ import annotations
7
  from typing import Optional
8
+ from server.models import Action, Reward, RewardBreakdown, Email
9
 
10
 
11
  # ─── Urgency proximity map (partial credit for close guesses) ─────────────────
models.py β†’ server/models.py RENAMED
File without changes
test_environment.py β†’ server/test_environment.py RENAMED
@@ -12,27 +12,27 @@ import pytest
12
  # ─── These tests run after pip install -r requirements.txt ────────────────────
13
 
14
  def test_imports():
15
- from models import Observation, Action, Reward, StepResponse, EnvState
16
- from models import UrgencyLevel, EmailCategory, EmailAction
17
  assert UrgencyLevel.CRITICAL.value == "critical"
18
  assert EmailCategory.SPAM.value == "spam"
19
  assert EmailAction.DELETE.value == "delete"
20
 
21
 
22
  def test_reward_range():
23
- from models import Reward, RewardBreakdown
24
  r = Reward(value=0.75, feedback="ok")
25
  assert 0.0 <= r.value <= 1.0
26
 
27
 
28
  def test_reward_clamp():
29
- from models import Reward
30
  with pytest.raises(Exception):
31
  Reward(value=1.5, feedback="out of range")
32
 
33
 
34
  def test_reset_all_tasks():
35
- from environment import EmailTriageEnv
36
  env = EmailTriageEnv()
37
  for task_id in ["task_easy", "task_medium", "task_hard"]:
38
  obs = env.reset(task_id)
@@ -43,15 +43,15 @@ def test_reset_all_tasks():
43
 
44
 
45
  def test_reset_invalid_task():
46
- from environment import EmailTriageEnv
47
  env = EmailTriageEnv()
48
  with pytest.raises(ValueError):
49
  env.reset("task_nonexistent")
50
 
51
 
52
  def test_full_easy_episode():
53
- from environment import EmailTriageEnv
54
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
55
 
56
  env = EmailTriageEnv()
57
  obs = env.reset("task_easy")
@@ -78,8 +78,8 @@ def test_full_easy_episode():
78
 
79
 
80
  def test_step_after_done_raises():
81
- from environment import EmailTriageEnv
82
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
83
 
84
  env = EmailTriageEnv()
85
  env.reset("task_easy")
@@ -93,9 +93,9 @@ def test_step_after_done_raises():
93
 
94
 
95
  def test_perfect_spam_score():
96
- from graders import grade_task_easy
97
- from dataset import TASK_EASY_EMAILS
98
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
99
 
100
  # e001 is spam
101
  spam_email = next(e for e in TASK_EASY_EMAILS if e.id == "e001")
@@ -109,9 +109,9 @@ def test_perfect_spam_score():
109
 
110
 
111
  def test_missed_spam_penalty():
112
- from graders import grade_task_easy
113
- from dataset import TASK_EASY_EMAILS
114
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
115
 
116
  spam_email = next(e for e in TASK_EASY_EMAILS if e.id == "e001")
117
  act = Action(
@@ -125,8 +125,8 @@ def test_missed_spam_penalty():
125
 
126
 
127
  def test_state_reflects_progress():
128
- from environment import EmailTriageEnv
129
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
130
 
131
  env = EmailTriageEnv()
132
  env.reset("task_easy")
@@ -141,9 +141,9 @@ def test_state_reflects_progress():
141
 
142
 
143
  def test_reply_quality_grader():
144
- from graders import reply_quality_score
145
- from dataset import TASK_HARD_EMAILS
146
- from models import Email
147
 
148
  # h001 is a customer complaint β€” needs apology, resolution, etc.
149
  email = next(e for e in TASK_HARD_EMAILS if e.id == "h001")
@@ -162,16 +162,16 @@ def test_reply_quality_grader():
162
 
163
 
164
  def test_task_email_counts():
165
- from dataset import TASK_EMAILS
166
  assert len(TASK_EMAILS["task_easy"]) == 10
167
  assert len(TASK_EMAILS["task_medium"]) == 15
168
  assert len(TASK_EMAILS["task_hard"]) == 20
169
 
170
 
171
  def test_all_graders_return_valid_range():
172
- from graders import grade
173
- from dataset import TASK_EMAILS
174
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
175
 
176
  act = Action(urgency=UrgencyLevel.HIGH, category=EmailCategory.FINANCE, action=EmailAction.ESCALATE)
177
  for task_id, emails in TASK_EMAILS.items():
 
12
  # ─── These tests run after pip install -r requirements.txt ────────────────────
13
 
14
  def test_imports():
15
+ from server.models import Observation, Action, Reward, StepResponse, EnvState
16
+ from server.models import UrgencyLevel, EmailCategory, EmailAction
17
  assert UrgencyLevel.CRITICAL.value == "critical"
18
  assert EmailCategory.SPAM.value == "spam"
19
  assert EmailAction.DELETE.value == "delete"
20
 
21
 
22
  def test_reward_range():
23
+ from server.models import Reward, RewardBreakdown
24
  r = Reward(value=0.75, feedback="ok")
25
  assert 0.0 <= r.value <= 1.0
26
 
27
 
28
  def test_reward_clamp():
29
+ from server.models import Reward
30
  with pytest.raises(Exception):
31
  Reward(value=1.5, feedback="out of range")
32
 
33
 
34
  def test_reset_all_tasks():
35
+ from server.environment import EmailTriageEnv
36
  env = EmailTriageEnv()
37
  for task_id in ["task_easy", "task_medium", "task_hard"]:
38
  obs = env.reset(task_id)
 
43
 
44
 
45
  def test_reset_invalid_task():
46
+ from server.environment import EmailTriageEnv
47
  env = EmailTriageEnv()
48
  with pytest.raises(ValueError):
49
  env.reset("task_nonexistent")
50
 
51
 
52
  def test_full_easy_episode():
53
+ from server.environment import EmailTriageEnv
54
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
55
 
56
  env = EmailTriageEnv()
57
  obs = env.reset("task_easy")
 
78
 
79
 
80
  def test_step_after_done_raises():
81
+ from server.environment import EmailTriageEnv
82
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
83
 
84
  env = EmailTriageEnv()
85
  env.reset("task_easy")
 
93
 
94
 
95
  def test_perfect_spam_score():
96
+ from server.graders import grade_task_easy
97
+ from server.dataset import TASK_EASY_EMAILS
98
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
99
 
100
  # e001 is spam
101
  spam_email = next(e for e in TASK_EASY_EMAILS if e.id == "e001")
 
109
 
110
 
111
  def test_missed_spam_penalty():
112
+ from server.graders import grade_task_easy
113
+ from server.dataset import TASK_EASY_EMAILS
114
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
115
 
116
  spam_email = next(e for e in TASK_EASY_EMAILS if e.id == "e001")
117
  act = Action(
 
125
 
126
 
127
  def test_state_reflects_progress():
128
+ from server.environment import EmailTriageEnv
129
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
130
 
131
  env = EmailTriageEnv()
132
  env.reset("task_easy")
 
141
 
142
 
143
  def test_reply_quality_grader():
144
+ from server.graders import reply_quality_score
145
+ from server.dataset import TASK_HARD_EMAILS
146
+ from server.models import Email
147
 
148
  # h001 is a customer complaint β€” needs apology, resolution, etc.
149
  email = next(e for e in TASK_HARD_EMAILS if e.id == "h001")
 
162
 
163
 
164
  def test_task_email_counts():
165
+ from server.dataset import TASK_EMAILS
166
  assert len(TASK_EMAILS["task_easy"]) == 10
167
  assert len(TASK_EMAILS["task_medium"]) == 15
168
  assert len(TASK_EMAILS["task_hard"]) == 20
169
 
170
 
171
  def test_all_graders_return_valid_range():
172
+ from server.graders import grade
173
+ from server.dataset import TASK_EMAILS
174
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
175
 
176
  act = Action(urgency=UrgencyLevel.HIGH, category=EmailCategory.FINANCE, action=EmailAction.ESCALATE)
177
  for task_id, emails in TASK_EMAILS.items():
validate.py β†’ server/validate.py RENAMED
@@ -62,7 +62,7 @@ except Exception as e:
62
  # ─── 3. Pydantic models ───────────────────────────────────────────────────────
63
  print("\n3. Typed models (Pydantic)")
64
  try:
65
- from models import Observation, Action, Reward, StepResponse, EnvState
66
  check(True, "Observation model imports")
67
  check(True, "Action model imports")
68
  check(True, "Reward model imports")
@@ -78,8 +78,8 @@ except Exception as e:
78
  # ─── 4. Environment API ───────────────────────────────────────────────────────
79
  print("\n4. Environment API (reset/step/state)")
80
  try:
81
- from environment import EmailTriageEnv
82
- from models import Action, UrgencyLevel, EmailCategory, EmailAction
83
 
84
  env = EmailTriageEnv()
85
 
@@ -118,8 +118,8 @@ except Exception as e:
118
  # ─── 5. Graders ───────────────────────────────────────────────────────────────
119
  print("\n5. Task graders (3 tasks, scores in [0,1])")
120
  try:
121
- from graders import grade
122
- from dataset import TASK_EMAILS
123
 
124
  for tid in ["task_easy", "task_medium", "task_hard"]:
125
  emails = TASK_EMAILS[tid]
 
62
  # ─── 3. Pydantic models ───────────────────────────────────────────────────────
63
  print("\n3. Typed models (Pydantic)")
64
  try:
65
+ from server.models import Observation, Action, Reward, StepResponse, EnvState
66
  check(True, "Observation model imports")
67
  check(True, "Action model imports")
68
  check(True, "Reward model imports")
 
78
  # ─── 4. Environment API ───────────────────────────────────────────────────────
79
  print("\n4. Environment API (reset/step/state)")
80
  try:
81
+ from server.environment import EmailTriageEnv
82
+ from server.models import Action, UrgencyLevel, EmailCategory, EmailAction
83
 
84
  env = EmailTriageEnv()
85
 
 
118
  # ─── 5. Graders ───────────────────────────────────────────────────────────────
119
  print("\n5. Task graders (3 tasks, scores in [0,1])")
120
  try:
121
+ from server.graders import grade
122
+ from server.dataset import TASK_EMAILS
123
 
124
  for tid in ["task_easy", "task_medium", "task_hard"]:
125
  emails = TASK_EMAILS[tid]