Vittal-M commited on
Commit
de36ac9
·
verified ·
1 Parent(s): 248269f

Upload tasks/task3_hard.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. tasks/task3_hard.py +71 -0
tasks/task3_hard.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task 3 — Schedule Repair (Hard).
2
+
3
+ The agent observes an infeasible scheduling instance and must return a
4
+ corrected schedule (JSON) that:
5
+ (a) is valid JSON with the required schema — 0.4 pts
6
+ (b) satisfies all scheduling constraints — 0.4 pts
7
+ (c) achieves a makespan within 30% of the known optimal— 0.2 pts
8
+
9
+ Partial progress: parseable JSON earns 0.2 base reward per step.
10
+ Max steps per episode: 8.
11
+ Expected agent accuracy: ~30%.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any
17
+
18
+ from environment import INSTANCE_BANK, SchedulingOptEnv
19
+ from models import Action
20
+
21
+ TASK_ID = "schedule_repair"
22
+ MAX_STEPS = 8
23
+ DIFFICULTY = "hard"
24
+
25
+
26
+ def run_episode(env: SchedulingOptEnv, agent_fn: Any) -> dict[str, Any]:
27
+ """Run a single schedule-repair episode.
28
+
29
+ Args:
30
+ env: An initialized SchedulingOptEnv instance.
31
+ agent_fn: Callable receiving an Observation, returning a JSON schedule string.
32
+
33
+ Returns:
34
+ Episode summary dict.
35
+ """
36
+ obs = env.reset(task_id=TASK_ID)
37
+ total_reward = 0.0
38
+ steps = 0
39
+ info: dict[str, Any] = {}
40
+
41
+ for _ in range(MAX_STEPS):
42
+ response = agent_fn(obs)
43
+ action = Action(response=response, task_id=TASK_ID)
44
+ obs, reward, done, info = env.step(action)
45
+ total_reward += reward
46
+ steps += 1
47
+ if done:
48
+ break
49
+
50
+ return {
51
+ "task": TASK_ID,
52
+ "difficulty": DIFFICULTY,
53
+ "steps": steps,
54
+ "total_reward": round(total_reward, 4),
55
+ "info": info,
56
+ }
57
+
58
+
59
+ def get_repairable_instances() -> list[dict[str, Any]]:
60
+ """Return instances that are infeasible and have known optimal schedules."""
61
+ return [
62
+ {
63
+ "instance": entry["instance"],
64
+ "optimal_schedule": entry["optimal_schedule"],
65
+ "optimal_makespan": entry["optimal_makespan"],
66
+ "violation_type": entry["violation_type"],
67
+ "description": entry["description"],
68
+ }
69
+ for entry in INSTANCE_BANK
70
+ if not entry["is_feasible"] and entry.get("optimal_schedule")
71
+ ]