Spaces:

anonymousDevil
/

cognitive-load-manager

Sleeping

App Files Files Community

Shreeraj Mummidivarapu commited on Apr 12

Commit

0c20f33

unverified ·

1 Parent(s): 0a53d38

Eswar Ki Krupa !!

Browse files

Files changed (1) hide show

inference.py +36 -32

inference.py CHANGED Viewed

@@ -1,14 +1,14 @@
 #!/usr/bin/env python3
 """
 inference.py — LLM Agent for Cognitive Load Manager
-Runs the CLM environment locally (no HTTP) so LLM calls are ALWAYS made.
-Mirrors the my_env pattern that passed Phase 2 validation.
 """
 import os
 import sys
 import json
-from typing import List, Optional, Dict, Any, Tuple
 # ── Load .env for local development ──────────────────────────────────────────
 try:
@@ -22,20 +22,18 @@ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME   = os.getenv("MODEL_NAME",   "Qwen/Qwen2.5-72B-Instruct")
 API_KEY      = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
-BENCHMARK = "cognitive-load-manager"
-TASK_NAME = "schedule-optimization"
 SUCCESS_SCORE_THRESHOLD = 0.5
-MAX_STEPS = 50
-# ── OpenAI client — always built, always used, no gating ─────────────────────
 from openai import OpenAI
 client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY or "missing")
-# ── Import CLM environment directly (no HTTP, guaranteed to work) ─────────────
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from models import (
-    Action, CLMEnvironment, generate_tasks, deterministic_grader
-)
 # ── Logging ───────────────────────────────────────────────────────────────────
 def log_start(task: str, env: str, model: str) -> None:
@@ -56,7 +54,7 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
         flush=True,
     )
-# ── LLM action — ALWAYS called, never gated on key presence ──────────────────
 def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]:
     history_str = "\n".join(history[-5:]) if history else "No previous actions."
@@ -72,9 +70,7 @@ def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]
         "STRATEGY:\n"
         "1. If fatigue_level is 'high' OR stress_warning is true → "
         '{"type": "break", "task_id": null}\n'
-        "2. If fatigue_level is 'medium' → work on earliest deadline incomplete task\n"
-        "3. Otherwise → work on earliest deadline incomplete task\n"
-        "4. Pick incomplete tasks (progress < 1.0) with the earliest deadline first.\n"
     )
     user_prompt = (
@@ -83,7 +79,7 @@ def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]
         "What is your next action JSON?"
     )
-    # Always attempt LLM call — this is what registers on the proxy
     completion = client.chat.completions.create(
         model=MODEL_NAME,
         messages=[
@@ -96,10 +92,9 @@ def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]
     text = (completion.choices[0].message.content or "").strip()
     # Strip markdown fences
-    if text.startswith("```json"):
-        text = text[7:]
-    if text.startswith("```"):
-        text = text[3:]
     if text.endswith("```"):
         text = text[:-3]
     text = text.strip()
@@ -112,7 +107,7 @@ def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]
 def heuristic_action(observation_dict: dict) -> Dict:
-    """Fallback used only when LLM response is unparseable."""
     tasks  = observation_dict.get("tasks", [])
     incomp = [t for t in tasks if t.get("progress", 0.0) < 1.0]
     fs     = observation_dict.get("visible_state", {})
@@ -123,7 +118,7 @@ def heuristic_action(observation_dict: dict) -> Dict:
     return {"type": "delay", "task_id": None}
-# ── Main task runner ──────────────────────────────────────────────────────────
 def run_task(level: str) -> float:
     log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
@@ -149,28 +144,26 @@ def run_task(level: str) -> float:
         action_dict: Optional[Dict] = None
         error_msg: Optional[str]    = None
-        # Always call LLM — never skip it
         try:
             action_dict = get_llm_action(observation_dict, history)
         except Exception as ex:
             error_msg = str(ex)[:80]
-        # Only fall back to heuristic if LLM response was unparseable
         if not action_dict:
             action_dict = heuristic_action(observation_dict)
-        # Validate action type
         valid_types = {"work", "break", "switch", "delay"}
         if action_dict.get("type") not in valid_types:
             action_dict = {"type": "delay", "task_id": None}
         action_str = json.dumps(action_dict, separators=(",", ":"))
-        # Step the local environment
         try:
-            action     = Action(type=action_dict["type"], task_id=action_dict.get("task_id"))
             obs, reward, done, info = env.step(action)
-            reward = float(reward)
         except Exception as ex:
             reward    = 0.01
             done      = True
@@ -178,10 +171,8 @@ def run_task(level: str) -> float:
         rewards.append(reward)
         history.append(f"Step {step}: {action_str} -> reward={reward:.2f}")
         log_step(step=step, action=action_str, reward=reward, done=done, error=error_msg)
-    # Final score
     score = float(info.get("final_score", 0.0))
     if score == 0.0:
         score = deterministic_grader(env.state.tasks, env.state.time_step, env.state.energy)
@@ -192,9 +183,22 @@ def run_task(level: str) -> float:
     return score
 def main():
-    level = os.getenv("CLM_LEVEL", "hard")
-    run_task(level)
 if __name__ == "__main__":

 #!/usr/bin/env python3
 """
 inference.py — LLM Agent for Cognitive Load Manager
+Runs ALL 3 tasks (easy, medium, hard) so the validator sees 3 graded tasks.
+Imports CLM environment locally — guaranteed LLM calls on every step.
 """
 import os
 import sys
 import json
+from typing import List, Optional, Dict
 # ── Load .env for local development ──────────────────────────────────────────
 try:
 MODEL_NAME   = os.getenv("MODEL_NAME",   "Qwen/Qwen2.5-72B-Instruct")
 API_KEY      = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+BENCHMARK             = "cognitive-load-manager"
+TASK_NAME             = "schedule-optimization"
 SUCCESS_SCORE_THRESHOLD = 0.5
+MAX_STEPS             = 50
+# ── OpenAI client — always built, always used, never gated ───────────────────
 from openai import OpenAI
 client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY or "missing")
+# ── Import CLM environment directly (no HTTP — always works) ──────────────────
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from models import Action, CLMEnvironment, generate_tasks, deterministic_grader
 # ── Logging ───────────────────────────────────────────────────────────────────
 def log_start(task: str, env: str, model: str) -> None:
         flush=True,
     )
+# ── LLM action — ALWAYS called, never gated ──────────────────────────────────
 def get_llm_action(observation_dict: dict, history: List[str]) -> Optional[Dict]:
     history_str = "\n".join(history[-5:]) if history else "No previous actions."
         "STRATEGY:\n"
         "1. If fatigue_level is 'high' OR stress_warning is true → "
         '{"type": "break", "task_id": null}\n'
+        "2. Otherwise → work on the incomplete task with the earliest deadline.\n"
     )
     user_prompt = (
         "What is your next action JSON?"
     )
+    # Always attempt LLM call — registers on the proxy
     completion = client.chat.completions.create(
         model=MODEL_NAME,
         messages=[
     text = (completion.choices[0].message.content or "").strip()
     # Strip markdown fences
+    for fence in ("```json", "```"):
+        if text.startswith(fence):
+            text = text[len(fence):]
     if text.endswith("```"):
         text = text[:-3]
     text = text.strip()
 def heuristic_action(observation_dict: dict) -> Dict:
+    """Fallback used ONLY when LLM response is unparseable."""
     tasks  = observation_dict.get("tasks", [])
     incomp = [t for t in tasks if t.get("progress", 0.0) < 1.0]
     fs     = observation_dict.get("visible_state", {})
     return {"type": "delay", "task_id": None}
+# ── Single task runner ────────────────────────────────────────────────────────
 def run_task(level: str) -> float:
     log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
         action_dict: Optional[Dict] = None
         error_msg: Optional[str]    = None
+        # Always call LLM — never skip
         try:
             action_dict = get_llm_action(observation_dict, history)
         except Exception as ex:
             error_msg = str(ex)[:80]
+        # Heuristic fallback only if LLM response is unparseable
         if not action_dict:
             action_dict = heuristic_action(observation_dict)
         valid_types = {"work", "break", "switch", "delay"}
         if action_dict.get("type") not in valid_types:
             action_dict = {"type": "delay", "task_id": None}
         action_str = json.dumps(action_dict, separators=(",", ":"))
         try:
+            action          = Action(type=action_dict["type"], task_id=action_dict.get("task_id"))
             obs, reward, done, info = env.step(action)
+            reward          = float(reward)
         except Exception as ex:
             reward    = 0.01
             done      = True
         rewards.append(reward)
         history.append(f"Step {step}: {action_str} -> reward={reward:.2f}")
         log_step(step=step, action=action_str, reward=reward, done=done, error=error_msg)
     score = float(info.get("final_score", 0.0))
     if score == 0.0:
         score = deterministic_grader(env.state.tasks, env.state.time_step, env.state.energy)
     return score
+# ── Main — runs ALL 3 tasks so validator sees 3 graded tasks ──────────────────
 def main():
+    # Run all 3 difficulty levels — validator needs at least 3 tasks graded
+    levels = ["easy", "medium", "hard"]
+    all_scores = {}
+    for level in levels:
+        try:
+            score = run_task(level)
+            all_scores[level] = score
+        except Exception as ex:
+            print(f"[ERROR] task={level} error={str(ex)[:80]}", flush=True)
+            all_scores[level] = 0.01
+    avg = max(0.01, min(0.99, sum(all_scores.values()) / len(all_scores)))
+    print(f"[SUMMARY] scores={json.dumps(all_scores)} average={avg:.3f}", flush=True)
 if __name__ == "__main__":