Vittal-M commited on
Commit
a077e4a
·
verified ·
1 Parent(s): 3a6ed5b

Upload tasks/task1_easy.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. tasks/task1_easy.py +68 -0
tasks/task1_easy.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task 1 — Feasibility Check (Easy).
2
+
3
+ The agent observes a scheduling instance (jobs, machines, proposed assignments)
4
+ and must respond with "feasible" or "infeasible" to indicate whether all
5
+ scheduling constraints are satisfied.
6
+
7
+ Grading: exact match — 1.0 if correct, 0.1 if wrong, 0.0 if empty.
8
+ Max steps per episode: 3.
9
+ Expected agent accuracy: ~90%.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from environment import INSTANCE_BANK, SchedulingOptEnv
17
+ from graders.grader_detection import FeasibilityGrader
18
+ from models import Action
19
+
20
+ TASK_ID = "feasibility_check"
21
+ MAX_STEPS = 3
22
+ DIFFICULTY = "easy"
23
+
24
+
25
+ def run_episode(env: SchedulingOptEnv, agent_fn: Any) -> dict[str, Any]:
26
+ """Run a single feasibility-check episode.
27
+
28
+ Args:
29
+ env: An initialized SchedulingOptEnv instance.
30
+ agent_fn: A callable that receives an Observation and returns a
31
+ response string ("feasible" or "infeasible").
32
+
33
+ Returns:
34
+ Episode summary dict with total reward and step count.
35
+ """
36
+ obs = env.reset(task_id=TASK_ID)
37
+ total_reward = 0.0
38
+ steps = 0
39
+ info: dict[str, Any] = {}
40
+
41
+ for _ in range(MAX_STEPS):
42
+ response = agent_fn(obs)
43
+ action = Action(response=response, task_id=TASK_ID)
44
+ obs, reward, done, info = env.step(action)
45
+ total_reward += reward
46
+ steps += 1
47
+ if done:
48
+ break
49
+
50
+ return {
51
+ "task": TASK_ID,
52
+ "difficulty": DIFFICULTY,
53
+ "steps": steps,
54
+ "total_reward": round(total_reward, 4),
55
+ "info": info,
56
+ }
57
+
58
+
59
+ def get_all_instances_with_answers() -> list[dict[str, Any]]:
60
+ """Return instance bank entries relevant to feasibility check."""
61
+ return [
62
+ {
63
+ "instance": entry["instance"],
64
+ "is_feasible": entry["is_feasible"],
65
+ "description": entry["description"],
66
+ }
67
+ for entry in INSTANCE_BANK
68
+ ]