Spaces:

voldemort6996
/

rl-bus-optimizer

Running

App Files Files Community

voldemort6996 commited on 20 days ago

Commit

9906627

1 Parent(s): 0c86254

Compliance: Fully aligned project with OpenEnv requirements (API, logging, and structure)

Browse files

Files changed (12) hide show

Dockerfile +6 -2
__pycache__/agent.cpython-314.pyc +0 -0
__pycache__/environment.cpython-314.pyc +0 -0
__pycache__/tasks.cpython-314.pyc +0 -0
grader.py +4 -4
inference.py +53 -9
openenv.yaml +5 -4
pyproject.toml +37 -0
requirements.txt +2 -0
server/__init__.py +1 -0
app.py → server/app.py +70 -12
uv.lock +0 -0

Dockerfile CHANGED Viewed

@@ -16,6 +16,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy project
 COPY . .
-# Default: run the Gradio dashboard for Hugging Face Spaces
 EXPOSE 7860
-CMD ["python", "app.py"]

 # Copy project
 COPY . .
+# Ensure the app is served on 0.0.0.0 for Spaces
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV PYTHONPATH="/app"
+# Default: run the Gradio dashboard + OpenEnv API for Hugging Face Spaces
 EXPOSE 7860
+CMD ["python", "server/app.py"]

__pycache__/agent.cpython-314.pyc CHANGED Viewed

Binary files a/__pycache__/agent.cpython-314.pyc and b/__pycache__/agent.cpython-314.pyc differ

__pycache__/environment.cpython-314.pyc CHANGED Viewed

Binary files a/__pycache__/environment.cpython-314.pyc and b/__pycache__/environment.cpython-314.pyc differ

__pycache__/tasks.cpython-314.pyc CHANGED Viewed

Binary files a/__pycache__/tasks.cpython-314.pyc and b/__pycache__/tasks.cpython-314.pyc differ

grader.py CHANGED Viewed

@@ -244,16 +244,16 @@ def main() -> None:
     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
-        print(f"\n{'─' * 50}")
-        print(f"  {tr['task']} ({tr['difficulty']})  —  score: {tr['score']:.4f}")
-        print(f"{'─' * 50}")
         for section in ("rl_agent", "baseline_greedy", "baseline_highest_queue_first", "baseline_random"):
             print(f"  [{section}]")
             for k, v in tr[section].items():
                 print(f"    {k}: {v:.4f}")
     print(f"\n{'=' * 60}")
-    print(f"  Aggregate score (0.0 – 1.0): {report['aggregate_score']:.4f}")
     print(f"  Weights: {report['weights']}")
     print(f"{'=' * 60}")

     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
+        print(f"\n{'-' * 50}")
+        print(f"  {tr['task']} ({tr['difficulty']})  -  score: {tr['score']:.4f}")
+        print(f"{'-' * 50}")
         for section in ("rl_agent", "baseline_greedy", "baseline_highest_queue_first", "baseline_random"):
             print(f"  [{section}]")
             for k, v in tr[section].items():
                 print(f"    {k}: {v:.4f}")
     print(f"\n{'=' * 60}")
+    print(f"  Aggregate score (0.0 - 1.0): {report['aggregate_score']:.4f}")
     print(f"  Weights: {report['weights']}")
     print(f"{'=' * 60}")

inference.py CHANGED Viewed

@@ -44,6 +44,38 @@ from tasks import TASKS, TaskConfig, get_task
 from grader import grade_all_tasks, grade_task_1, grade_task_2, grade_task_3
 # ---------------------------------------------------------------------------
 # Mock LLM agent (deterministic fallback when API is unavailable)
 # ---------------------------------------------------------------------------
@@ -168,7 +200,7 @@ def build_agent(mode: str, model_path: Optional[str] = None) -> Callable[[np.nda
         from agent import DQNAgent
         if model_path is None:
-            model_path = "models/dqn_bus.pt"
         if not os.path.isfile(model_path):
             print(f"[ERROR] DQN model not found at '{model_path}'. Train first with: python train.py")
             sys.exit(1)
@@ -192,28 +224,40 @@ def run_inference(mode: str, model_path: Optional[str], episodes: int) -> Dict:
     """Run inference across all three tasks and return the grade report."""
     agent = build_agent(mode, model_path)
     print(f"\n{'=' * 60}")
-    print("  OpenEnv Bus Routing — Inference")
     print(f"{'=' * 60}")
     print(f"  Mode     : {mode}")
     print(f"  Episodes : {episodes}")
     print(f"{'=' * 60}\n")
     t0 = time.time()
-    # EXACT FORMAT REQUIRED: START/STEP/END logs
-    print("START")
     report = grade_all_tasks(agent, episodes=episodes)
-    print("STEP") # Marked evaluation step
-    print("END")
     elapsed = time.time() - t0
     # Pretty print
     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
-        print(f"{'─' * 55}")
-        print(f"  {tr['task']} ({tr['difficulty']})  →  score: {tr['score']:.4f}")
-        print(f"{'─' * 55}")
         for section in ("rl_agent", "baseline_greedy"):
             print(f"    [{section}]")
             for k, v in tr[section].items():

 from grader import grade_all_tasks, grade_task_1, grade_task_2, grade_task_3
+# ---------------------------------------------------------------------------
+# Strict Structured Logging (Mandatory Hackathon Requirement)
+# ---------------------------------------------------------------------------
+def log_start(**kwargs):
+    """Emit [START] log with key-value pairs."""
+    vals = " ".join(f"{k}={v}" for k, v in kwargs.items())
+    print(f"[START] {vals}", flush=True)
+def log_step(**kwargs):
+    """Emit [STEP] log with key-value pairs."""
+    # Convert potential None or complex types to strings
+    vals = " ".join(f"{k}={v if v is not None else 'null'}" for k, v in kwargs.items())
+    print(f"[STEP] {vals}", flush=True)
+def log_end(**kwargs):
+    """Emit [END] log with key-value pairs."""
+    import json
+    # Special handling for rewards list to keep it as a JSON string in the log
+    payload = []
+    for k, v in kwargs.items():
+        if isinstance(v, (list, np.ndarray)):
+            v_str = json.dumps(list(v))
+        else:
+            v_str = str(v)
+        payload.append(f"{k}={v_str}")
+    vals = " ".join(payload)
+    print(f"[END] {vals}", flush=True)
 # ---------------------------------------------------------------------------
 # Mock LLM agent (deterministic fallback when API is unavailable)
 # ---------------------------------------------------------------------------
         from agent import DQNAgent
         if model_path is None:
+            model_path = "models/dqn_bus_v6_best.pt"
         if not os.path.isfile(model_path):
             print(f"[ERROR] DQN model not found at '{model_path}'. Train first with: python train.py")
             sys.exit(1)
     """Run inference across all three tasks and return the grade report."""
     agent = build_agent(mode, model_path)
     print(f"\n{'=' * 60}")
+    print("  OpenEnv Bus Routing - Inference")
     print(f"{'=' * 60}")
     print(f"  Mode     : {mode}")
     print(f"  Episodes : {episodes}")
     print(f"{'=' * 60}\n")
     t0 = time.time()
+    # Strict compliance: report results in structured format
+    log_start(task=mode, env="rl-bus-optimization", model=MODEL_NAME)
+    # We run the report and log its high-level outcome in the END block
+    # Note: the sample script logs every step during a simulation,
+    # but since our grader runs multiple episodes, we will log the aggregate results.
     report = grade_all_tasks(agent, episodes=episodes)
+    # Simplified step log for aggregate progress
+    log_step(step=episodes, action="evaluate_all", reward=report["aggregate_score"], done="true", error="null")
+    log_end(
+        success=bool(report["aggregate_score"] > 0.7),
+        steps=episodes,
+        score=report["aggregate_score"],
+        rewards=[report[t]["score"] for t in ("task_easy", "task_medium", "task_hard")]
+    )
     elapsed = time.time() - t0
     # Pretty print
     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
+        print(f"{'-' * 55}")
+        print(f"  {tr['task']} ({tr['difficulty']})  ->  score: {tr['score']:.4f}")
+        print(f"{'-' * 55}")
         for section in ("rl_agent", "baseline_greedy"):
             print(f"    [{section}]")
             for k, v in tr[section].items():

openenv.yaml CHANGED Viewed

@@ -1,10 +1,11 @@
 name: rl-bus-optimization
 description: >
-  RL-based bus routing environment for optimising passenger service on a
-  circular transit route.  An agent learns to balance passenger wait times,
-  fuel consumption, and stop coverage using Deep Q-Learning.
-version: "1.0.0"
 environment:
   class: environment.BusRoutingEnv

 name: rl-bus-optimization
 description: >
+  A production-grade RL environment for bus route optimization.
+  Features a circular transit route where an agent (Dueling Double DQN)
+  learns to maximize passenger service efficiency while minimizing fuel
+  consumption and wait times. Includes real-world GTFS-demand profiles.
+version: "1.1.0"
 environment:
   class: environment.BusRoutingEnv

pyproject.toml ADDED Viewed

	@@ -0,0 +1,37 @@

+[project]
+name = "rl-bus-optimization"
+version = "1.0.0"
+description = "RL-based bus routing environment for optimising passenger service on a circular transit route."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "numpy>=1.23",
+    "torch>=2.0",
+    "pydantic>=2.0",
+    "openai>=1.0",
+    "pyyaml>=6.0",
+    "gradio>=4.0",
+    "plotly>=5.0",
+    "pandas>=2.0",
+    "openenv-core>=0.2.0",
+]
+[project.scripts]
+server = "server.app:main"
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools]
+packages = ["data", "models"]
+py-modules = [
+    "agent",
+    "app",
+    "environment",
+    "grader",
+    "inference",
+    "llm_evaluator",
+    "tasks",
+    "train",
+]

requirements.txt CHANGED Viewed

@@ -6,3 +6,5 @@ pyyaml>=6.0
 gradio>=4.0
 plotly>=5.0
 pandas>=2.0

 gradio>=4.0
 plotly>=5.0
 pandas>=2.0
+uvicorn>=0.20.0
+openenv-core>=0.2.0

server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # OpenEnv Server Package

app.py → server/app.py RENAMED Viewed

@@ -4,13 +4,21 @@ import pandas as pd
 import numpy as np
 import time
 import os
 import copy
 from typing import Dict, Any, List, Tuple
-from environment import BusRoutingEnv
-from tasks import get_task
 from agent import DQNAgent
 # ---------------------------------------------------------------------------
 # Training Analytics Helpers
 # ---------------------------------------------------------------------------
@@ -148,10 +156,51 @@ class HeuristicAgent:
 state = SessionState()
 ACTION_MAP = {
-    0: "🚚 MOVE + PICKUP",
-    1: "⏩ MOVE + SKIP",
-    2: "⏸️ WAIT + PICKUP",
 }
 # ---------------------------------------------------------------------------
@@ -533,7 +582,7 @@ with gr.Blocks(title="OpenEnv Bus RL Optimizer") as demo:
         with gr.Column(scale=3):
             gr.HTML("""
             <div class="header-box">
-                <div style="font-size: 3rem; background: rgba(255,255,255,0.1); padding: 5px; border-radius: 50%;">🚌</div>
                 <div>
                     <h1 class="header-title">OPENENV BUS OPTIMIZER</h1>
                     <p style="margin:0; opacity:0.8;">Dueling DDQN + PER | GTFS-Calibrated Demand | Real-Time Urban Logistics RL</p>
@@ -544,12 +593,12 @@ with gr.Blocks(title="OpenEnv Bus RL Optimizer") as demo:
             with gr.Group():
                 gr.HTML("""
                 <div class="info-box">
-                    <b style="color: #2ecc71;">🧠 WHAT THIS DOES:</b><br>
                     <span style="font-size: 0.9rem; opacity: 0.9;">AI optimizes bus routing to reduce wait times and fuel usage.</span><br>
-                    <span class="info-highlight">👉 Click "START AI DEMO" to witness the optimization.</span>
                 </div>
                 """)
-                demo_run_btn = gr.Button("🚀 START AI DEMO (Auto Simulation)", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=1):
@@ -572,8 +621,8 @@ with gr.Blocks(title="OpenEnv Bus RL Optimizer") as demo:
         with gr.Column(scale=3):
             plot_area = gr.Plot(label="Live Simulation Feed")
             with gr.Row():
-                step_btn = gr.Button("⏭️ SINGLE STEP (Manual)", scale=1)
-                inner_run_btn = gr.Button("⏩ RUN 10 STEPS", variant="secondary", scale=1)
             with gr.Row():
                 with gr.Column(scale=2):
@@ -633,5 +682,14 @@ with gr.Blocks(title="OpenEnv Bus RL Optimizer") as demo:
     </div>
     """)
 if __name__ == "__main__":
-    demo.launch(theme=gr.themes.Soft(), css=CSS, server_name="0.0.0.0", server_port=7860)

 import numpy as np
 import time
 import os
+import sys
 import copy
 from typing import Dict, Any, List, Tuple
+# Ensure root directory is in path for imports
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from environment import BusRoutingEnv, Observation, Action, Reward
+from tasks import get_task, TASK_MEDIUM
 from agent import DQNAgent
+from fastapi import FastAPI, Body, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
 # ---------------------------------------------------------------------------
 # Training Analytics Helpers
 # ---------------------------------------------------------------------------
 state = SessionState()
+# --- OpenEnv API Implementation (for Automated Validators) ---
+api_app = FastAPI(title="OpenEnv Bus RL API")
+api_app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Shared background environment for API calls
+api_env = TASK_MEDIUM.build_env()
+@api_app.post("/reset")
+async def api_reset():
+    """OpenEnv standard reset endpoint."""
+    obs = api_env.reset()
+    return obs.model_dump()
+@api_app.post("/step")
+async def api_step(action_req: Dict[str, int] = Body(...)):
+    """OpenEnv standard step endpoint."""
+    # Automated validators might send {"action": X}
+    act_val = action_req.get("action", 0)
+    obs, reward, done, info = api_env.step(act_val)
+    return {
+        "observation": obs.model_dump(),
+        "reward": reward.model_dump(),
+        "done": bool(done),
+        "info": info
+    }
+@api_app.get("/state")
+async def api_state():
+    """OpenEnv standard state endpoint."""
+    return api_env.state()
+@api_app.get("/health")
+async def health():
+    return {"status": "healthy", "env": "rl-bus-optimization"}
+# --- Gradio UI Mapping ---
 ACTION_MAP = {
+    0: "MOVE + PICKUP",
+    1: "MOVE + SKIP",
+    2: "WAIT + PICKUP",
 }
 # ---------------------------------------------------------------------------
         with gr.Column(scale=3):
             gr.HTML("""
             <div class="header-box">
+                <div style="font-size: 3rem; background: rgba(255,255,255,0.1); padding: 5px; border-radius: 50%;">BUS</div>
                 <div>
                     <h1 class="header-title">OPENENV BUS OPTIMIZER</h1>
                     <p style="margin:0; opacity:0.8;">Dueling DDQN + PER | GTFS-Calibrated Demand | Real-Time Urban Logistics RL</p>
             with gr.Group():
                 gr.HTML("""
                 <div class="info-box">
+                    <b style="color: #2ecc71;">WHAT THIS DOES:</b><br>
                     <span style="font-size: 0.9rem; opacity: 0.9;">AI optimizes bus routing to reduce wait times and fuel usage.</span><br>
+                    <span class="info-highlight">Click 'START AI DEMO' to witness the optimization.</span>
                 </div>
                 """)
+                demo_run_btn = gr.Button("START AI DEMO (Auto Simulation)", variant="primary", size="lg")
     with gr.Row():
         with gr.Column(scale=1):
         with gr.Column(scale=3):
             plot_area = gr.Plot(label="Live Simulation Feed")
             with gr.Row():
+                step_btn = gr.Button("SINGLE STEP (Manual)", scale=1)
+                inner_run_btn = gr.Button("RUN 10 STEPS", variant="secondary", scale=1)
             with gr.Row():
                 with gr.Column(scale=2):
     </div>
     """)
+def main():
+    # Mount Gradio app onto FastAPI
+    import gradio as gr
+    app = gr.mount_gradio_app(api_app, demo, path="/")
+    # Run with uvicorn
+    print("Starting OpenEnv Server + Dashboard on http://0.0.0.0:7860")
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")
 if __name__ == "__main__":
+    main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff