Spaces:

Aldrimore
/

RLScheduling

Sleeping

Roshan818 commited on Apr 11

Commit

a0f94c2

1 Parent(s): b77936a

fix: lazy openenv.core imports so FactoryEnv works on all Python envs

- factory_env/env.py: wrap openenv.core Environment import in try/except
- factory_env/models.py: lazy imports for Action/Observation/State base classes,
add explicit done/reward fields to FactoryObservation for fallback case
- grader.py: clean FactoryEnv-only grader, no pure-Python fallback

Files changed (3) hide show

factory_env/env.py +8 -2
factory_env/models.py +13 -2
grader.py +18 -102

factory_env/env.py CHANGED Viewed

@@ -1,13 +1,19 @@
 import random
 from typing import List, Optional
-from openenv.core import Environment
 from factory_env.models import FactoryAction, FactoryObservation, FactoryState, Machine, Job
 from factory_env.tasks import TASKS
-class FactoryEnv(Environment[FactoryAction, FactoryObservation, FactoryState]):
     """Smart Factory Scheduling Environment — OpenEnv compliant."""
     SUPPORTS_CONCURRENT_SESSIONS = True

 import random
 from typing import List, Optional
+# Lazy base-class: import openenv.core only when it's available.
+# This lets FactoryEnv be imported (e.g. by the grader) even in minimal
+# environments where openenv-core's gradio/PIL chain fails to load.
+try:
+    from openenv.core import Environment as _EnvBase
+except Exception:
+    _EnvBase = object  # type: ignore[assignment,misc]
 from factory_env.models import FactoryAction, FactoryObservation, FactoryState, Machine, Job
 from factory_env.tasks import TASKS
+class FactoryEnv(_EnvBase):
     """Smart Factory Scheduling Environment — OpenEnv compliant."""
     SUPPORTS_CONCURRENT_SESSIONS = True

factory_env/models.py CHANGED Viewed

@@ -1,6 +1,14 @@
 from typing import List, Optional
 from pydantic import BaseModel, ConfigDict, Field
-from openenv.core import Action as BaseAction, Observation as BaseObservation, State as BaseState
 class Machine(BaseModel):
@@ -32,7 +40,10 @@ class FactoryAction(BaseAction):
 class FactoryObservation(BaseObservation):
-    """Inherits: done (bool), reward (float|None), metadata (dict)"""
     machines: List[Machine] = Field(default_factory=list)
     pending_jobs: List[Job] = Field(default_factory=list)
     completed_jobs: List[Job] = Field(default_factory=list)

 from typing import List, Optional
 from pydantic import BaseModel, ConfigDict, Field
+# Lazy openenv base classes — fall back to pydantic BaseModel when the
+# openenv.core import chain (which pulls in gradio/PIL) is unavailable.
+try:
+    from openenv.core import Action as BaseAction, Observation as BaseObservation, State as BaseState
+except Exception:
+    BaseAction = BaseModel       # type: ignore[assignment,misc]
+    BaseObservation = BaseModel  # type: ignore[assignment,misc]
+    BaseState = BaseModel        # type: ignore[assignment,misc]
 class Machine(BaseModel):
 class FactoryObservation(BaseObservation):
+    """Inherits done/reward/metadata from openenv base when available;
+    defined here explicitly so the class works when falling back to BaseModel."""
+    done: bool = False
+    reward: Optional[float] = None
     machines: List[Machine] = Field(default_factory=list)
     pending_jobs: List[Job] = Field(default_factory=list)
     completed_jobs: List[Job] = Field(default_factory=list)

grader.py CHANGED Viewed

@@ -1,25 +1,17 @@
 """
 Graders for Smart Factory Scheduling tasks.
-Primary path: imports FactoryEnv, runs a full deterministic heuristic episode,
-and scores the result using the real environment state.
-Fallback path (if factory_env is unavailable): a minimal pure-Python
-simulation is used so the validator can still load and call these functions.
-All three public functions:
-  - Accept an optional state/env argument for scoring a finished episode.
-  - When called with no argument, run their own deterministic episode.
-  - Always return a float strictly in (0.0, 1.0).
 """
 from __future__ import annotations
-import random
-from typing import Any, List, Optional
-# ── Score formula (shared by both paths) ─────────────────────────────────────
 def _compute(completed: int, on_time: int, total: int, late: int) -> float:
     if total == 0:
@@ -32,8 +24,8 @@ def _compute(completed: int, on_time: int, total: int, late: int) -> float:
     return round(max(0.001, min(0.999, score)), 4)
-def _score_obj(obj: Any) -> float:
-    """Score from a finished env object or state dict."""
     if isinstance(obj, dict):
         done_list = obj.get("completed_jobs", []) or []
         pend_list = obj.get("pending_jobs",   []) or []
@@ -53,17 +45,14 @@ def _score_obj(obj: Any) -> float:
         t         = int(getattr(obj, "time", 0) or 0)
         completed = len(done_list)
         total     = completed + len(pend_list)
-        on_time   = sum(
-            1 for j in done_list
-            if getattr(j, "deadline", 0) >= t
-        )
     return _compute(completed, on_time, total, late)
-# ── Primary path: use the real FactoryEnv ────────────────────────────────────
 def _heuristic(obs):
-    """Earliest-deadline-first heuristic action (works on FactoryObservation)."""
     from factory_env.models import FactoryAction
     for m in obs.machines:
         if m.status == "broken":
@@ -76,8 +65,10 @@ def _heuristic(obs):
     return None
-def _run_factory_episode(task: str, seed: int = 42) -> float:
-    """Run a full heuristic episode on the real FactoryEnv and return score."""
     from factory_env.env import FactoryEnv
     from factory_env.models import FactoryAction
@@ -91,81 +82,6 @@ def _run_factory_episode(task: str, seed: int = 42) -> float:
     return _score_obj(env)
-# ── Fallback path: pure-Python mini-simulation ───────────────────────────────
-_TASK_CFG = {
-    "easy":   dict(nm=2, nj=3,  fr=0.00, ms=20, jtr=(2,4), ds=(2,5), mp=1),
-    "medium": dict(nm=4, nj=7,  fr=0.08, ms=30, jtr=(2,5), ds=(2,6), mp=2),
-    "hard":   dict(nm=6, nj=12, fr=0.15, ms=40, jtr=(2,6), ds=(1,5), mp=3),
-}
-def _run_mini_episode(task: str, seed: int = 42) -> float:
-    """Pure-Python fallback simulation (no external deps)."""
-    cfg = _TASK_CFG[task]
-    rng = random.Random(seed)
-    machines = [{"id": f"M{i+1}", "status": "idle", "job": None,
-                 "fr": cfg["fr"]} for i in range(cfg["nm"])]
-    jobs = []
-    for i in range(cfg["nj"]):
-        pt = rng.randint(*cfg["jtr"])
-        dl = pt + rng.randint(*cfg["ds"])
-        jobs.append({"id": f"J{i+1}", "rt": pt, "dl": dl,
-                     "pr": rng.randint(1, cfg["mp"])})
-    completed, late, t = [], 0, 0
-    for _ in range(cfg["ms"]):
-        if not jobs:
-            break
-        # repair broken machines
-        for m in machines:
-            if m["status"] == "broken":
-                m["status"] = "idle"
-                break
-        # assign jobs EDF
-        for j in sorted(jobs, key=lambda x: (x["dl"], -x["pr"])):
-            for m in machines:
-                if m["status"] == "idle":
-                    m["status"] = "busy"
-                    m["job"]    = j["id"]
-                    j["m"]      = m["id"]
-                    break
-        t += 1
-        for m in machines:
-            if m["status"] == "busy":
-                j = next((x for x in jobs if x["id"] == m["job"]), None)
-                if j:
-                    j["rt"] -= 1
-                    if j["rt"] <= 0:
-                        if t > j["dl"]:
-                            late += 1
-                        completed.append(j)
-                        jobs.remove(j)
-                        m["status"] = "idle"
-                        m["job"]    = None
-            if m["status"] == "busy" and cfg["fr"] > 0:
-                if rng.random() < cfg["fr"]:
-                    m["status"] = "broken"
-                    m["job"]    = None
-    total   = len(completed) + len(jobs)
-    n       = len(completed)
-    on_time = max(0, n - late)
-    return _compute(n, on_time, total, late)
-# ── Episode runner (tries FactoryEnv, falls back if unavailable) ─────────────
-def _episode(task: str) -> float:
-    try:
-        return _run_factory_episode(task)
-    except Exception:
-        return _run_mini_episode(task)
 # ── Public graders ────────────────────────────────────────────────────────────
 def score_easy(state_or_env=None) -> float:
@@ -173,7 +89,7 @@ def score_easy(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
-    return _episode("easy")
 def score_medium(state_or_env=None) -> float:
@@ -181,7 +97,7 @@ def score_medium(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
-    return _episode("medium")
 def score_hard(state_or_env=None) -> float:
@@ -189,4 +105,4 @@ def score_hard(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
-    return _episode("hard")

 """
 Graders for Smart Factory Scheduling tasks.
+Each public function:
+  - Accepts an optional state/env argument to score a finished episode.
+  - When called with no argument, runs a deterministic heuristic episode
+    on the real FactoryEnv and returns the score.
+  - Always returns a float strictly in (0.0, 1.0).
 """
 from __future__ import annotations
+# ── Score formula ─────────────────────────────────────────────────────────────
 def _compute(completed: int, on_time: int, total: int, late: int) -> float:
     if total == 0:
     return round(max(0.001, min(0.999, score)), 4)
+def _score_obj(obj) -> float:
+    """Score from a finished FactoryEnv object or state dict."""
     if isinstance(obj, dict):
         done_list = obj.get("completed_jobs", []) or []
         pend_list = obj.get("pending_jobs",   []) or []
         t         = int(getattr(obj, "time", 0) or 0)
         completed = len(done_list)
         total     = completed + len(pend_list)
+        on_time   = sum(1 for j in done_list if getattr(j, "deadline", 0) >= t)
     return _compute(completed, on_time, total, late)
+# ── Heuristic agent ───────────────────────────────────────────────────────────
 def _heuristic(obs):
+    """Earliest-deadline-first heuristic that runs on a FactoryObservation."""
     from factory_env.models import FactoryAction
     for m in obs.machines:
         if m.status == "broken":
     return None
+# ── Episode runner ────────────────────────────────────────────────────────────
+def _run_episode(task: str, seed: int = 42) -> float:
+    """Run a full heuristic episode on FactoryEnv and return the graded score."""
     from factory_env.env import FactoryEnv
     from factory_env.models import FactoryAction
     return _score_obj(env)
 # ── Public graders ────────────────────────────────────────────────────────────
 def score_easy(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
+    return _run_episode("easy")
 def score_medium(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
+    return _run_episode("medium")
 def score_hard(state_or_env=None) -> float:
     Returns float in (0.0, 1.0)."""
     if state_or_env is not None:
         return _score_obj(state_or_env)
+    return _run_episode("hard")