Somuai12 commited on
Commit
89d39f7
·
1 Parent(s): c8aa313

Final Polish: Task-aware fallbacks and surgical refinement

Browse files
Files changed (1) hide show
  1. inference.py +44 -13
inference.py CHANGED
@@ -15,6 +15,7 @@ STDOUT FORMAT:
15
 
16
  import asyncio
17
  import os
 
18
  import json
19
  from typing import Dict, List, Optional
20
 
@@ -61,7 +62,9 @@ class PolicyEvolverAgent:
61
  "3. MEASURABLE CRITERIA: Define terms with 'if-then' and metrics.\n"
62
  "4. ANALYTICAL COT: Your 'think' field MUST be 150-250 words and include terms: 'tradeoff', 'precision', "
63
  "'recall', 'threshold', 'impact', 'evidence'.\n"
64
- "5. JSON ONLY: Output ONLY the JSON object. No preamble."
 
 
65
  )
66
 
67
  def __init__(self, model: str):
@@ -98,7 +101,10 @@ class PolicyEvolverAgent:
98
  if start != -1 and end != -1:
99
  return json.loads(raw[start : end + 1])
100
  raise
101
- except Exception:
 
 
 
102
  return None
103
 
104
  def _build_feedback(self, step: int, last_score: float, last_action: dict, task_id: str) -> str:
@@ -145,8 +151,20 @@ class PolicyEvolverAgent:
145
  for act, sc in zip(self.action_history[-3:], self.score_history[-3:]):
146
  lines.append(f" [{sc:.2f}] {act.get('action_type', '?')}")
147
 
148
- target = min(last_score + 0.25, 0.95)
149
- lines.append(f"\nYour next proposal MUST score above {target:.2f}.")
 
 
 
 
 
 
 
 
 
 
 
 
150
  return "\n".join(lines)
151
 
152
  def get_action(self, client: OpenAI, task_id: str, obs: dict) -> dict:
@@ -188,15 +206,28 @@ class PolicyEvolverAgent:
188
 
189
  result = self._call_llm(client, prompt)
190
  if result is None:
191
- # Fallback action so we never crash
192
- result = {
193
- "action_type": "propose_clarification",
194
- "ambiguous_term": "offensive",
195
- "suggested_definition": "Content must be classified as offensive if and only if it contains explicit slurs, direct threats of physical violence, or identity-based harassment verifiable within 24 hours.",
196
- "affected_policy_ids": ["pol_001"],
197
- "justification": "Replaces subjective term with measurable criteria.",
198
- "think": "Analyzing the tradeoff between precision and recall for content moderation threshold. Evidence from the corpus shows impact of vague terms on enforcement. Setting a measurable threshold improves consistency.",
199
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  return result
201
 
202
 
 
15
 
16
  import asyncio
17
  import os
18
+ import sys
19
  import json
20
  from typing import Dict, List, Optional
21
 
 
62
  "3. MEASURABLE CRITERIA: Define terms with 'if-then' and metrics.\n"
63
  "4. ANALYTICAL COT: Your 'think' field MUST be 150-250 words and include terms: 'tradeoff', 'precision', "
64
  "'recall', 'threshold', 'impact', 'evidence'.\n"
65
+ "5. JSON ONLY: Output ONLY the JSON object. No preamble.\n"
66
+ "6. INCREMENTALISM: If your previous score was high (>0.80), focus on surgical precision rather than holistic rewriting. "
67
+ "DO NOT add words that create ambiguity."
68
  )
69
 
70
  def __init__(self, model: str):
 
101
  if start != -1 and end != -1:
102
  return json.loads(raw[start : end + 1])
103
  raise
104
+ except Exception as e:
105
+ print(f"[DEBUG] LLM Call Error: {e}", file=sys.stderr)
106
+ if 'raw' in locals():
107
+ print(f"[DEBUG] Raw content: {raw}", file=sys.stderr)
108
  return None
109
 
110
  def _build_feedback(self, step: int, last_score: float, last_action: dict, task_id: str) -> str:
 
151
  for act, sc in zip(self.action_history[-3:], self.score_history[-3:]):
152
  lines.append(f" [{sc:.2f}] {act.get('action_type', '?')}")
153
 
154
+ # Surgical Refinement Guard
155
+ if last_score >= 0.80:
156
+ lines = [
157
+ f"\n=== SURGICAL REFINEMENT (Step {step}) ===",
158
+ f"Current Score: {last_score:.3f} — EXCELLENT.",
159
+ "CRITICAL: Do NOT rewrite the policy. Only perform 'surgical' removals or additions.",
160
+ "1. CHECK: Remove 'might', 'could', 'perhaps', 'sometimes', 'often' if present.",
161
+ "2. CHECK: Ensure words count >= 12. Add one more specific metric (%, hours, $) if needed.",
162
+ "Do NOT add any words that could be seen as vague. Aim for 0.95+."
163
+ ]
164
+ else:
165
+ target = min(last_score + 0.20, 0.95)
166
+ lines.append(f"\nYour next proposal MUST score above {target:.2f}. Be more specific.")
167
+
168
  return "\n".join(lines)
169
 
170
  def get_action(self, client: OpenAI, task_id: str, obs: dict) -> dict:
 
206
 
207
  result = self._call_llm(client, prompt)
208
  if result is None:
209
+ # Task-Aware fallback so we never crash and actions remain valid
210
+ if task_id == "task_easy":
211
+ return {
212
+ "action_type": "propose_clarification",
213
+ "ambiguous_term": "offensive",
214
+ "suggested_definition": "Content is offensive if it contains explicit slurs or direct threats of violence verified by local context.",
215
+ "affected_policy_ids": ["pol_001"], "justification": "Fallback clarification.", "think": "LLM failed - using robust baseline."
216
+ }
217
+ elif task_id == "task_medium":
218
+ return {
219
+ "action_type": "propose_new_rule",
220
+ "rule_domain": "generative_ai_use",
221
+ "new_rule": "Employees must explicitly disclose all generative AI interactions for proprietary code drafting.",
222
+ "scope": ["code", "proprietary"], "integration_points": ["pol_ai_001"], "justification": "Fallback new rule.", "think": "LLM failed - using robust baseline."
223
+ }
224
+ else:
225
+ return {
226
+ "action_type": "evolve_policy",
227
+ "policy_modifications": [{"policy_id": "pol_rev_001", "change_type": "enhance", "new_text": "Apply manual review to high velocity sellers.", "reason": "Systemic safety."}],
228
+ "expected_outcomes": {"fraud_rate": 0.5, "revenue_velocity": 0.5, "seller_trust": 0.5},
229
+ "rollback_conditions": ["If fraud rate peaks"], "justification": "Fallback evolution.", "think": "LLM failed - using robust baseline."
230
+ }
231
  return result
232
 
233