Spaces:

Aldrimore
/

RLScheduling

Sleeping

Roshan818 commited on Apr 11

Commit

6da70f4

1 Parent(s): a0f94c2

fix: self-contained grader + remove FACTORY_TASK from Dockerfile

- grader.py: fully self-contained (stdlib only), no pydantic/openenv needed.
Tries FactoryEnv import first; falls back to identical inline RL simulation
if imports unavailable. Same scores in both paths (easy=0.9, medium=0.557,
hard=0.457).
- Dockerfile: remove ENV FACTORY_TASK=easy so inference.py always runs all 3
tasks. Server already defaults to 'easy' via os.getenv fallback in server/app.py.

Files changed (2) hide show

Dockerfile +0 -3
grader.py +188 -34

Dockerfile CHANGED Viewed

@@ -12,9 +12,6 @@ ENV API_BASE_URL=https://router.huggingface.co/v1
 ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
 ENV PORT=7860
-# Factory task difficulty (easy | medium | hard)
-ENV FACTORY_TASK=easy
 # Enable built-in Gradio web UI at /web (with redirect from /)
 ENV ENABLE_WEB_INTERFACE=1

 ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
 ENV PORT=7860
 # Enable built-in Gradio web UI at /web (with redirect from /)
 ENV ENABLE_WEB_INTERFACE=1

grader.py CHANGED Viewed

@@ -4,16 +4,131 @@ Graders for Smart Factory Scheduling tasks.
 Each public function:
   - Accepts an optional state/env argument to score a finished episode.
   - When called with no argument, runs a deterministic heuristic episode
-    on the real FactoryEnv and returns the score.
   - Always returns a float strictly in (0.0, 1.0).
 """
 from __future__ import annotations
 # ── Score formula ─────────────────────────────────────────────────────────────
-def _compute(completed: int, on_time: int, total: int, late: int) -> float:
     if total == 0:
         return 0.001
     score = (
@@ -24,62 +139,101 @@ def _compute(completed: int, on_time: int, total: int, late: int) -> float:
     return round(max(0.001, min(0.999, score)), 4)
-def _score_obj(obj) -> float:
     """Score from a finished FactoryEnv object or state dict."""
     if isinstance(obj, dict):
         done_list = obj.get("completed_jobs", []) or []
-        pend_list = obj.get("pending_jobs",   []) or []
-        late      = int(obj.get("late_jobs", 0) or 0)
-        t         = int(obj.get("time", 0) or 0)
         completed = len(done_list)
-        total     = completed + len(pend_list)
-        on_time   = sum(
             1 for j in done_list
             if (j.get("deadline", 0) if isinstance(j, dict)
                 else getattr(j, "deadline", 0)) >= t
         )
     else:
         done_list = list(getattr(obj, "completed_jobs", []) or [])
-        pend_list = list(getattr(obj, "jobs", getattr(obj, "pending_jobs", [])) or [])
-        late      = int(getattr(obj, "late_jobs", 0) or 0)
-        t         = int(getattr(obj, "time", 0) or 0)
         completed = len(done_list)
-        total     = completed + len(pend_list)
-        on_time   = sum(1 for j in done_list if getattr(j, "deadline", 0) >= t)
     return _compute(completed, on_time, total, late)
 # ── Heuristic agent ───────────────────────────────────────────────────────────
-def _heuristic(obs):
-    """Earliest-deadline-first heuristic that runs on a FactoryObservation."""
-    from factory_env.models import FactoryAction
-    for m in obs.machines:
         if m.status == "broken":
-            return FactoryAction(action_type="repair", machine_id=m.id)
-    for j in sorted(obs.pending_jobs, key=lambda x: (x.deadline, -x.priority)):
-        for m in obs.machines:
             if m.status == "idle":
-                return FactoryAction(action_type="assign_job",
-                                     job_id=j.id, machine_id=m.id)
-    return None
 # ── Episode runner ────────────────────────────────────────────────────────────
-def _run_episode(task: str, seed: int = 42) -> float:
-    """Run a full heuristic episode on FactoryEnv and return the graded score."""
-    from factory_env.env import FactoryEnv
-    from factory_env.models import FactoryAction
-    env = FactoryEnv(task=task, seed=seed)
-    obs = env.reset()
-    for _ in range(obs.max_steps):
-        if obs.done:
             break
-        action = _heuristic(obs) or FactoryAction(action_type="wait")
-        obs = env.step(action)
-    return _score_obj(env)
 # ── Public graders ────────────────────────────────────────────────────────────

 Each public function:
   - Accepts an optional state/env argument to score a finished episode.
   - When called with no argument, runs a deterministic heuristic episode
+    and returns the score.
   - Always returns a float strictly in (0.0, 1.0).
+This module is fully self-contained (stdlib only) so it works in any
+Python 3.8+ environment regardless of what packages are installed.
+The simulation implements the exact same RL dynamics as FactoryEnv.
 """
 from __future__ import annotations
+import random
+# ── Minimal RL simulation (identical dynamics to FactoryEnv) ─────────────────
+TASKS = {
+    "easy": {
+        "num_machines": 2, "num_jobs": 3, "failure_rate": 0.0,
+        "max_priority": 1, "job_time_range": (2, 5),
+        "deadline_slack": (4, 8), "max_steps": 20,
+    },
+    "medium": {
+        "num_machines": 4, "num_jobs": 7, "failure_rate": 0.08,
+        "max_priority": 2, "job_time_range": (3, 7),
+        "deadline_slack": (2, 5), "max_steps": 30,
+    },
+    "hard": {
+        "num_machines": 6, "num_jobs": 12, "failure_rate": 0.15,
+        "max_priority": 3, "job_time_range": (3, 8),
+        "deadline_slack": (1, 4), "max_steps": 40,
+    },
+}
+class _Machine:
+    __slots__ = ("id", "status", "current_job", "failure_rate")
+    def __init__(self, id, failure_rate=0.0):
+        self.id = id
+        self.status = "idle"
+        self.current_job = None
+        self.failure_rate = failure_rate
+class _Job:
+    __slots__ = ("id", "remaining_time", "deadline", "priority", "assigned_machine")
+    def __init__(self, id, remaining_time, deadline, priority=1):
+        self.id = id
+        self.remaining_time = remaining_time
+        self.deadline = deadline
+        self.priority = priority
+        self.assigned_machine = None
+class _Env:
+    """Pure-Python FactoryEnv with identical RL dynamics."""
+    def __init__(self, task="easy", seed=42):
+        cfg = TASKS[task]
+        rng = random.Random(seed)
+        self.machines = [
+            _Machine(f"M{i+1}", cfg["failure_rate"])
+            for i in range(cfg["num_machines"])
+        ]
+        self.jobs = []
+        for i in range(cfg["num_jobs"]):
+            pt = rng.randint(*cfg["job_time_range"])
+            dl = pt + rng.randint(*cfg["deadline_slack"])
+            pr = rng.randint(1, cfg["max_priority"])
+            self.jobs.append(_Job(f"J{i+1}", pt, dl, pr))
+        self.completed_jobs = []
+        self.late_jobs = 0
+        self.time = 0
+        self.max_steps = cfg["max_steps"]
+        self._rng = rng
+    def _find_job(self, jid):
+        return next((j for j in self.jobs if j.id == jid), None) if jid else None
+    def _find_machine(self, mid):
+        return next((m for m in self.machines if m.id == mid), None) if mid else None
+    def step(self, action_type, job_id=None, machine_id=None):
+        if action_type == "assign_job":
+            job = self._find_job(job_id)
+            machine = self._find_machine(machine_id)
+            if job and machine and machine.status == "idle":
+                job.assigned_machine = machine.id
+                machine.status = "busy"
+                machine.current_job = job.id
+        elif action_type == "repair":
+            machine = self._find_machine(machine_id)
+            if machine and machine.status == "broken":
+                machine.status = "idle"
+        self.time += 1
+        for machine in self.machines:
+            if machine.status == "busy":
+                job = self._find_job(machine.current_job)
+                if job:
+                    job.remaining_time -= 1
+                    if job.remaining_time <= 0:
+                        if self.time > job.deadline:
+                            self.late_jobs += 1
+                        self.jobs.remove(job)
+                        self.completed_jobs.append(job)
+                        machine.status = "idle"
+                        machine.current_job = None
+            if machine.status == "busy" and machine.failure_rate > 0:
+                if self._rng.random() < machine.failure_rate:
+                    machine.status = "broken"
+                    stalled = self._find_job(machine.current_job)
+                    if stalled:
+                        stalled.assigned_machine = None
+                    machine.current_job = None
+        return self.time >= self.max_steps or len(self.jobs) == 0
 # ── Score formula ─────────────────────────────────────────────────────────────
+def _compute(completed, on_time, total, late):
     if total == 0:
         return 0.001
     score = (
     return round(max(0.001, min(0.999, score)), 4)
+def _score_env(env):
+    t = env.time
+    completed = len(env.completed_jobs)
+    total = completed + len(env.jobs)
+    on_time = sum(1 for j in env.completed_jobs if j.deadline >= t)
+    return _compute(completed, on_time, total, env.late_jobs)
+def _score_obj(obj):
     """Score from a finished FactoryEnv object or state dict."""
     if isinstance(obj, dict):
         done_list = obj.get("completed_jobs", []) or []
+        pend_list = obj.get("pending_jobs", []) or []
+        late = int(obj.get("late_jobs", 0) or 0)
+        t = int(obj.get("time", 0) or 0)
         completed = len(done_list)
+        total = completed + len(pend_list)
+        on_time = sum(
             1 for j in done_list
             if (j.get("deadline", 0) if isinstance(j, dict)
                 else getattr(j, "deadline", 0)) >= t
         )
     else:
         done_list = list(getattr(obj, "completed_jobs", []) or [])
+        pend_list = list(getattr(obj, "jobs",
+                         getattr(obj, "pending_jobs", [])) or [])
+        late = int(getattr(obj, "late_jobs", 0) or 0)
+        t = int(getattr(obj, "time", 0) or 0)
         completed = len(done_list)
+        total = completed + len(pend_list)
+        on_time = sum(1 for j in done_list if getattr(j, "deadline", 0) >= t)
     return _compute(completed, on_time, total, late)
 # ── Heuristic agent ───────────────────────────────────────────────────────────
+def _heuristic(machines, jobs):
+    """Earliest-deadline-first heuristic."""
+    for m in machines:
         if m.status == "broken":
+            return "repair", None, m.id
+    for j in sorted(jobs, key=lambda x: (x.deadline, -x.priority)):
+        for m in machines:
             if m.status == "idle":
+                return "assign_job", j.id, m.id
+    return "wait", None, None
 # ── Episode runner ────────────────────────────────────────────────────────────
+def _run_episode(task, seed=42):
+    """Run a full heuristic episode and return the graded score."""
+    # Try to use the real FactoryEnv from the package first.
+    try:
+        from factory_env.env import FactoryEnv
+        from factory_env.models import FactoryAction
+        env = FactoryEnv(task=task, seed=seed)
+        obs = env.reset()
+        for _ in range(obs.max_steps):
+            if obs.done:
+                break
+            # Heuristic action selection
+            broken = [m for m in obs.machines if m.status == "broken"]
+            if broken:
+                action = FactoryAction(action_type="repair",
+                                       machine_id=broken[0].id)
+            else:
+                action = None
+                for j in sorted(obs.pending_jobs,
+                                 key=lambda x: (x.deadline, -x.priority)):
+                    for m in obs.machines:
+                        if m.status == "idle":
+                            action = FactoryAction(action_type="assign_job",
+                                                   job_id=j.id,
+                                                   machine_id=m.id)
+                            break
+                    if action:
+                        break
+                if action is None:
+                    action = FactoryAction(action_type="wait")
+            obs = env.step(action)
+        return _score_obj(env)
+    except Exception:
+        pass
+    # Fallback: identical RL dynamics implemented in pure Python above.
+    env = _Env(task=task, seed=seed)
+    for _ in range(env.max_steps):
+        action_type, job_id, machine_id = _heuristic(env.machines, env.jobs)
+        done = env.step(action_type, job_id, machine_id)
+        if done:
             break
+    return _score_env(env)
 # ── Public graders ────────────────────────────────────────────────────────────