Roshan818 commited on
Commit
6da70f4
Β·
1 Parent(s): a0f94c2

fix: self-contained grader + remove FACTORY_TASK from Dockerfile

Browse files

- grader.py: fully self-contained (stdlib only), no pydantic/openenv needed.
Tries FactoryEnv import first; falls back to identical inline RL simulation
if imports unavailable. Same scores in both paths (easy=0.9, medium=0.557,
hard=0.457).
- Dockerfile: remove ENV FACTORY_TASK=easy so inference.py always runs all 3
tasks. Server already defaults to 'easy' via os.getenv fallback in server/app.py.

Files changed (2) hide show
  1. Dockerfile +0 -3
  2. grader.py +188 -34
Dockerfile CHANGED
@@ -12,9 +12,6 @@ ENV API_BASE_URL=https://router.huggingface.co/v1
12
  ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
13
  ENV PORT=7860
14
 
15
- # Factory task difficulty (easy | medium | hard)
16
- ENV FACTORY_TASK=easy
17
-
18
  # Enable built-in Gradio web UI at /web (with redirect from /)
19
  ENV ENABLE_WEB_INTERFACE=1
20
 
 
12
  ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
13
  ENV PORT=7860
14
 
 
 
 
15
  # Enable built-in Gradio web UI at /web (with redirect from /)
16
  ENV ENABLE_WEB_INTERFACE=1
17
 
grader.py CHANGED
@@ -4,16 +4,131 @@ Graders for Smart Factory Scheduling tasks.
4
  Each public function:
5
  - Accepts an optional state/env argument to score a finished episode.
6
  - When called with no argument, runs a deterministic heuristic episode
7
- on the real FactoryEnv and returns the score.
8
  - Always returns a float strictly in (0.0, 1.0).
 
 
 
 
9
  """
10
 
11
  from __future__ import annotations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  # ── Score formula ─────────────────────────────────────────────────────────────
15
 
16
- def _compute(completed: int, on_time: int, total: int, late: int) -> float:
17
  if total == 0:
18
  return 0.001
19
  score = (
@@ -24,62 +139,101 @@ def _compute(completed: int, on_time: int, total: int, late: int) -> float:
24
  return round(max(0.001, min(0.999, score)), 4)
25
 
26
 
27
- def _score_obj(obj) -> float:
 
 
 
 
 
 
 
 
28
  """Score from a finished FactoryEnv object or state dict."""
29
  if isinstance(obj, dict):
30
  done_list = obj.get("completed_jobs", []) or []
31
- pend_list = obj.get("pending_jobs", []) or []
32
- late = int(obj.get("late_jobs", 0) or 0)
33
- t = int(obj.get("time", 0) or 0)
34
  completed = len(done_list)
35
- total = completed + len(pend_list)
36
- on_time = sum(
37
  1 for j in done_list
38
  if (j.get("deadline", 0) if isinstance(j, dict)
39
  else getattr(j, "deadline", 0)) >= t
40
  )
41
  else:
42
  done_list = list(getattr(obj, "completed_jobs", []) or [])
43
- pend_list = list(getattr(obj, "jobs", getattr(obj, "pending_jobs", [])) or [])
44
- late = int(getattr(obj, "late_jobs", 0) or 0)
45
- t = int(getattr(obj, "time", 0) or 0)
 
46
  completed = len(done_list)
47
- total = completed + len(pend_list)
48
- on_time = sum(1 for j in done_list if getattr(j, "deadline", 0) >= t)
49
  return _compute(completed, on_time, total, late)
50
 
51
 
52
  # ── Heuristic agent ───────────────────────────────────────────────────────────
53
 
54
- def _heuristic(obs):
55
- """Earliest-deadline-first heuristic that runs on a FactoryObservation."""
56
- from factory_env.models import FactoryAction
57
- for m in obs.machines:
58
  if m.status == "broken":
59
- return FactoryAction(action_type="repair", machine_id=m.id)
60
- for j in sorted(obs.pending_jobs, key=lambda x: (x.deadline, -x.priority)):
61
- for m in obs.machines:
62
  if m.status == "idle":
63
- return FactoryAction(action_type="assign_job",
64
- job_id=j.id, machine_id=m.id)
65
- return None
66
 
67
 
68
  # ── Episode runner ────────────────────────────────────────────────────────────
69
 
70
- def _run_episode(task: str, seed: int = 42) -> float:
71
- """Run a full heuristic episode on FactoryEnv and return the graded score."""
72
- from factory_env.env import FactoryEnv
73
- from factory_env.models import FactoryAction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- env = FactoryEnv(task=task, seed=seed)
76
- obs = env.reset()
77
- for _ in range(obs.max_steps):
78
- if obs.done:
 
 
79
  break
80
- action = _heuristic(obs) or FactoryAction(action_type="wait")
81
- obs = env.step(action)
82
- return _score_obj(env)
83
 
84
 
85
  # ── Public graders ────────────────────────────────────────────────────────────
 
4
  Each public function:
5
  - Accepts an optional state/env argument to score a finished episode.
6
  - When called with no argument, runs a deterministic heuristic episode
7
+ and returns the score.
8
  - Always returns a float strictly in (0.0, 1.0).
9
+
10
+ This module is fully self-contained (stdlib only) so it works in any
11
+ Python 3.8+ environment regardless of what packages are installed.
12
+ The simulation implements the exact same RL dynamics as FactoryEnv.
13
  """
14
 
15
  from __future__ import annotations
16
+ import random
17
+
18
+
19
+ # ── Minimal RL simulation (identical dynamics to FactoryEnv) ─────────────────
20
+
21
+ TASKS = {
22
+ "easy": {
23
+ "num_machines": 2, "num_jobs": 3, "failure_rate": 0.0,
24
+ "max_priority": 1, "job_time_range": (2, 5),
25
+ "deadline_slack": (4, 8), "max_steps": 20,
26
+ },
27
+ "medium": {
28
+ "num_machines": 4, "num_jobs": 7, "failure_rate": 0.08,
29
+ "max_priority": 2, "job_time_range": (3, 7),
30
+ "deadline_slack": (2, 5), "max_steps": 30,
31
+ },
32
+ "hard": {
33
+ "num_machines": 6, "num_jobs": 12, "failure_rate": 0.15,
34
+ "max_priority": 3, "job_time_range": (3, 8),
35
+ "deadline_slack": (1, 4), "max_steps": 40,
36
+ },
37
+ }
38
+
39
+
40
+ class _Machine:
41
+ __slots__ = ("id", "status", "current_job", "failure_rate")
42
+
43
+ def __init__(self, id, failure_rate=0.0):
44
+ self.id = id
45
+ self.status = "idle"
46
+ self.current_job = None
47
+ self.failure_rate = failure_rate
48
+
49
+
50
+ class _Job:
51
+ __slots__ = ("id", "remaining_time", "deadline", "priority", "assigned_machine")
52
+
53
+ def __init__(self, id, remaining_time, deadline, priority=1):
54
+ self.id = id
55
+ self.remaining_time = remaining_time
56
+ self.deadline = deadline
57
+ self.priority = priority
58
+ self.assigned_machine = None
59
+
60
+
61
+ class _Env:
62
+ """Pure-Python FactoryEnv with identical RL dynamics."""
63
+
64
+ def __init__(self, task="easy", seed=42):
65
+ cfg = TASKS[task]
66
+ rng = random.Random(seed)
67
+ self.machines = [
68
+ _Machine(f"M{i+1}", cfg["failure_rate"])
69
+ for i in range(cfg["num_machines"])
70
+ ]
71
+ self.jobs = []
72
+ for i in range(cfg["num_jobs"]):
73
+ pt = rng.randint(*cfg["job_time_range"])
74
+ dl = pt + rng.randint(*cfg["deadline_slack"])
75
+ pr = rng.randint(1, cfg["max_priority"])
76
+ self.jobs.append(_Job(f"J{i+1}", pt, dl, pr))
77
+ self.completed_jobs = []
78
+ self.late_jobs = 0
79
+ self.time = 0
80
+ self.max_steps = cfg["max_steps"]
81
+ self._rng = rng
82
+
83
+ def _find_job(self, jid):
84
+ return next((j for j in self.jobs if j.id == jid), None) if jid else None
85
+
86
+ def _find_machine(self, mid):
87
+ return next((m for m in self.machines if m.id == mid), None) if mid else None
88
+
89
+ def step(self, action_type, job_id=None, machine_id=None):
90
+ if action_type == "assign_job":
91
+ job = self._find_job(job_id)
92
+ machine = self._find_machine(machine_id)
93
+ if job and machine and machine.status == "idle":
94
+ job.assigned_machine = machine.id
95
+ machine.status = "busy"
96
+ machine.current_job = job.id
97
+
98
+ elif action_type == "repair":
99
+ machine = self._find_machine(machine_id)
100
+ if machine and machine.status == "broken":
101
+ machine.status = "idle"
102
+
103
+ self.time += 1
104
+
105
+ for machine in self.machines:
106
+ if machine.status == "busy":
107
+ job = self._find_job(machine.current_job)
108
+ if job:
109
+ job.remaining_time -= 1
110
+ if job.remaining_time <= 0:
111
+ if self.time > job.deadline:
112
+ self.late_jobs += 1
113
+ self.jobs.remove(job)
114
+ self.completed_jobs.append(job)
115
+ machine.status = "idle"
116
+ machine.current_job = None
117
+
118
+ if machine.status == "busy" and machine.failure_rate > 0:
119
+ if self._rng.random() < machine.failure_rate:
120
+ machine.status = "broken"
121
+ stalled = self._find_job(machine.current_job)
122
+ if stalled:
123
+ stalled.assigned_machine = None
124
+ machine.current_job = None
125
+
126
+ return self.time >= self.max_steps or len(self.jobs) == 0
127
 
128
 
129
  # ── Score formula ─────────────────────────────────────────────────────────────
130
 
131
+ def _compute(completed, on_time, total, late):
132
  if total == 0:
133
  return 0.001
134
  score = (
 
139
  return round(max(0.001, min(0.999, score)), 4)
140
 
141
 
142
+ def _score_env(env):
143
+ t = env.time
144
+ completed = len(env.completed_jobs)
145
+ total = completed + len(env.jobs)
146
+ on_time = sum(1 for j in env.completed_jobs if j.deadline >= t)
147
+ return _compute(completed, on_time, total, env.late_jobs)
148
+
149
+
150
+ def _score_obj(obj):
151
  """Score from a finished FactoryEnv object or state dict."""
152
  if isinstance(obj, dict):
153
  done_list = obj.get("completed_jobs", []) or []
154
+ pend_list = obj.get("pending_jobs", []) or []
155
+ late = int(obj.get("late_jobs", 0) or 0)
156
+ t = int(obj.get("time", 0) or 0)
157
  completed = len(done_list)
158
+ total = completed + len(pend_list)
159
+ on_time = sum(
160
  1 for j in done_list
161
  if (j.get("deadline", 0) if isinstance(j, dict)
162
  else getattr(j, "deadline", 0)) >= t
163
  )
164
  else:
165
  done_list = list(getattr(obj, "completed_jobs", []) or [])
166
+ pend_list = list(getattr(obj, "jobs",
167
+ getattr(obj, "pending_jobs", [])) or [])
168
+ late = int(getattr(obj, "late_jobs", 0) or 0)
169
+ t = int(getattr(obj, "time", 0) or 0)
170
  completed = len(done_list)
171
+ total = completed + len(pend_list)
172
+ on_time = sum(1 for j in done_list if getattr(j, "deadline", 0) >= t)
173
  return _compute(completed, on_time, total, late)
174
 
175
 
176
  # ── Heuristic agent ───────────────────────────────────────────────────────────
177
 
178
+ def _heuristic(machines, jobs):
179
+ """Earliest-deadline-first heuristic."""
180
+ for m in machines:
 
181
  if m.status == "broken":
182
+ return "repair", None, m.id
183
+ for j in sorted(jobs, key=lambda x: (x.deadline, -x.priority)):
184
+ for m in machines:
185
  if m.status == "idle":
186
+ return "assign_job", j.id, m.id
187
+ return "wait", None, None
 
188
 
189
 
190
  # ── Episode runner ────────────────────────────────────────────────────────────
191
 
192
+ def _run_episode(task, seed=42):
193
+ """Run a full heuristic episode and return the graded score."""
194
+ # Try to use the real FactoryEnv from the package first.
195
+ try:
196
+ from factory_env.env import FactoryEnv
197
+ from factory_env.models import FactoryAction
198
+
199
+ env = FactoryEnv(task=task, seed=seed)
200
+ obs = env.reset()
201
+ for _ in range(obs.max_steps):
202
+ if obs.done:
203
+ break
204
+ # Heuristic action selection
205
+ broken = [m for m in obs.machines if m.status == "broken"]
206
+ if broken:
207
+ action = FactoryAction(action_type="repair",
208
+ machine_id=broken[0].id)
209
+ else:
210
+ action = None
211
+ for j in sorted(obs.pending_jobs,
212
+ key=lambda x: (x.deadline, -x.priority)):
213
+ for m in obs.machines:
214
+ if m.status == "idle":
215
+ action = FactoryAction(action_type="assign_job",
216
+ job_id=j.id,
217
+ machine_id=m.id)
218
+ break
219
+ if action:
220
+ break
221
+ if action is None:
222
+ action = FactoryAction(action_type="wait")
223
+ obs = env.step(action)
224
+ return _score_obj(env)
225
+
226
+ except Exception:
227
+ pass
228
 
229
+ # Fallback: identical RL dynamics implemented in pure Python above.
230
+ env = _Env(task=task, seed=seed)
231
+ for _ in range(env.max_steps):
232
+ action_type, job_id, machine_id = _heuristic(env.machines, env.jobs)
233
+ done = env.step(action_type, job_id, machine_id)
234
+ if done:
235
  break
236
+ return _score_env(env)
 
 
237
 
238
 
239
  # ── Public graders ────────────────────────────────────────────────────────────