Spaces:

saravanatanjiro
/

Openenv

Paused

App Files Files Community

kavin57447 commited on Apr 25

Commit

deef82c

1 Parent(s): ee5ddee

Fix truncation: 80 tokens, regex safety net, strict prompt

Browse files

Files changed (1) hide show

cloud_arena/llm_training.py +26 -10

cloud_arena/llm_training.py CHANGED Viewed

@@ -29,7 +29,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # ── GPU Optimization Constants ────────────────────────────────────────────────
 GRAD_ACCUM_STEPS = 4       # accumulate gradients over N episodes before stepping
 MAX_SEQ_LEN = 512          # shorter context = O(N²) attention is 4× faster than 1024
-MAX_GEN_TOKENS = 32        # force brief output — just need the ACTION line, not essays
 def format_prompt(state_dict):
@@ -54,22 +54,38 @@ def format_prompt(state_dict):
         f"- Never delete/stop prod resources or those with >=5 deps\n"
         f"- Temp resources with 0-1 deps are safe to delete\n"
         f"- RESIZE is always safe\n\n"
         f"REASONING:"
     )
 def extract_action_and_reasoning(response_text):
     reasoning = response_text.strip()
-    action = 2
-    action_match = re.search(r'ACTION:\s*(\d)', response_text, re.IGNORECASE)
     if action_match:
-        parsed = int(action_match.group(1))
-        if 0 <= parsed <= 4:
-            action = parsed
-    else:
-        digit_matches = re.findall(r'\b([0-4])\b', response_text[-50:])
-        if digit_matches:
-            action = int(digit_matches[-1])
     return action, reasoning

 # ── GPU Optimization Constants ────────────────────────────────────────────────
 GRAD_ACCUM_STEPS = 4       # accumulate gradients over N episodes before stepping
 MAX_SEQ_LEN = 512          # shorter context = O(N²) attention is 4× faster than 1024
+MAX_GEN_TOKENS = 80        # enough room for reasoning + ACTION line, not enough to ramble
 def format_prompt(state_dict):
         f"- Never delete/stop prod resources or those with >=5 deps\n"
         f"- Temp resources with 0-1 deps are safe to delete\n"
         f"- RESIZE is always safe\n\n"
+        f"CRITICAL: Output ONLY a brief reason then ACTION: <number 0-4>. Nothing else.\n\n"
         f"REASONING:"
     )
 def extract_action_and_reasoning(response_text):
+    """Regex safety net: extracts action even from truncated/malformed output."""
     reasoning = response_text.strip()
+    action = 2  # Default to RESIZE (safest action)
+    # Try explicit ACTION: N format first
+    action_match = re.search(r'ACTION:\s*([0-4])', response_text, re.IGNORECASE)
     if action_match:
+        return int(action_match.group(1)), reasoning
+    # Try JSON format: {"action": N} or {"action": "DELETE"}
+    json_match = re.search(r'\{.*?\}', response_text, re.DOTALL)
+    if json_match:
+        try:
+            import json
+            parsed = json.loads(json_match.group(0))
+            a = parsed.get("action", 2)
+            if isinstance(a, int) and 0 <= a <= 4:
+                return a, reasoning
+        except (json.JSONDecodeError, ValueError):
+            pass
+    # Last resort: any digit 0-4 near the end
+    digit_matches = re.findall(r'\b([0-4])\b', response_text[-30:])
+    if digit_matches:
+        action = int(digit_matches[-1])
     return action, reasoning