Spaces:
Sleeping
Sleeping
fix: corrected C++ memory clearance, enforced regex compliance, and bootstrapped RL state
Browse files- final_check.py +73 -25
- hf auditor/src/reconciliation_engine.hpp +12 -3
- inference.py +44 -44
- server/app.py +41 -2
- server/fin_auditor_environment.py +15 -5
final_check.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
import os
|
| 3 |
import sys
|
|
|
|
| 4 |
import json
|
| 5 |
import yaml
|
| 6 |
import unittest
|
|
@@ -29,25 +30,25 @@ class FinalIntegrityCheck(unittest.TestCase):
|
|
| 29 |
print("\n[TEST 1] LLM Parser Robustness...")
|
| 30 |
|
| 31 |
# Test Case A: Markdown wrapped JSON
|
| 32 |
-
dirty_json = "Here is the result:\n```json\n{\"decisions\": [0, 1,
|
| 33 |
res = inference._parse_llm_decisions(dirty_json, 3)
|
| 34 |
-
self.assertEqual(res, [0, 1,
|
| 35 |
|
| 36 |
# Test Case B: Extra text before JSON
|
| 37 |
-
extra_text = "The decisions are as follows: {\"decisions\": [1,
|
| 38 |
res = inference._parse_llm_decisions(extra_text, 2)
|
| 39 |
-
self.assertEqual(res, [1,
|
| 40 |
|
| 41 |
-
# Test Case C: Malformed JSON -> should trigger 'Flag All' (
|
| 42 |
malformed = "{\"decisions\": [0, 1, " # Missing closing bracket
|
| 43 |
res = inference._parse_llm_decisions(malformed, 4)
|
| 44 |
-
self.assertEqual(res, [
|
| 45 |
|
| 46 |
# Test Case D: Correct length normalization
|
| 47 |
wrong_len = "{\"decisions\": [1]}"
|
| 48 |
res = inference._parse_llm_decisions(wrong_len, 3)
|
| 49 |
self.assertEqual(len(res), 3, "Failed to normalize decision list length")
|
| 50 |
-
self.assertEqual(res, [1,
|
| 51 |
|
| 52 |
print("β LLM Parser logic is robust.")
|
| 53 |
|
|
@@ -71,30 +72,59 @@ class FinalIntegrityCheck(unittest.TestCase):
|
|
| 71 |
print(f"β Spec matches. Found {len(tasks)} tasks.")
|
| 72 |
|
| 73 |
def test_3_reward_boundary(self):
|
| 74 |
-
"""Verify environment rewards stay strictly within
|
| 75 |
print("\n[TEST 3] Reward Boundary Check...")
|
| 76 |
env = FinAuditorEnvironment()
|
| 77 |
obs = env.reset()
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
# Simulate a step with some decisions
|
| 80 |
-
action = AuditorAction(decisions=[
|
| 81 |
new_obs = env.step(action)
|
| 82 |
-
|
| 83 |
reward = new_obs.reward
|
| 84 |
self.assertIsNotNone(reward)
|
| 85 |
-
self.
|
| 86 |
-
self.
|
| 87 |
|
| 88 |
print(f"β Reward boundary is safe: {reward}")
|
| 89 |
|
| 90 |
-
def
|
| 91 |
-
"""
|
| 92 |
-
print("\n[TEST 4]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
# Mock the OpenAI client response
|
| 95 |
mock_response = MagicMock()
|
| 96 |
mock_response.choices = [MagicMock()]
|
| 97 |
-
mock_response.choices[0].message.content = json.dumps({"decisions": [
|
| 98 |
|
| 99 |
with patch("inference._client.chat.completions.create", return_value=mock_response):
|
| 100 |
f = io.StringIO()
|
|
@@ -102,18 +132,36 @@ class FinalIntegrityCheck(unittest.TestCase):
|
|
| 102 |
inference.run_inference()
|
| 103 |
|
| 104 |
output = f.getvalue()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
# Verify
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
self.
|
| 114 |
-
self.
|
| 115 |
|
| 116 |
-
print("β
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
| 119 |
unittest.main(verbosity=1)
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
import os
|
| 3 |
import sys
|
| 4 |
+
import re
|
| 5 |
import json
|
| 6 |
import yaml
|
| 7 |
import unittest
|
|
|
|
| 30 |
print("\n[TEST 1] LLM Parser Robustness...")
|
| 31 |
|
| 32 |
# Test Case A: Markdown wrapped JSON
|
| 33 |
+
dirty_json = "Here is the result:\n```json\n{\"decisions\": [0, 1, 1]}\n```\nHope this helps!"
|
| 34 |
res = inference._parse_llm_decisions(dirty_json, 3)
|
| 35 |
+
self.assertEqual(res, [0, 1, 1], "Failed to parse markdown-wrapped JSON")
|
| 36 |
|
| 37 |
# Test Case B: Extra text before JSON
|
| 38 |
+
extra_text = "The decisions are as follows: {\"decisions\": [1, 1]}"
|
| 39 |
res = inference._parse_llm_decisions(extra_text, 2)
|
| 40 |
+
self.assertEqual(res, [1, 1], "Failed to parse JSON with leading text")
|
| 41 |
|
| 42 |
+
# Test Case C: Malformed JSON -> should trigger 'Flag All' (1) fallback
|
| 43 |
malformed = "{\"decisions\": [0, 1, " # Missing closing bracket
|
| 44 |
res = inference._parse_llm_decisions(malformed, 4)
|
| 45 |
+
self.assertEqual(res, [1, 1, 1, 1], "Failed to trigger fallback on malformed JSON")
|
| 46 |
|
| 47 |
# Test Case D: Correct length normalization
|
| 48 |
wrong_len = "{\"decisions\": [1]}"
|
| 49 |
res = inference._parse_llm_decisions(wrong_len, 3)
|
| 50 |
self.assertEqual(len(res), 3, "Failed to normalize decision list length")
|
| 51 |
+
self.assertEqual(res, [1, 1, 1], "Failed to pad short decision list with 1s")
|
| 52 |
|
| 53 |
print("β LLM Parser logic is robust.")
|
| 54 |
|
|
|
|
| 72 |
print(f"β Spec matches. Found {len(tasks)} tasks.")
|
| 73 |
|
| 74 |
def test_3_reward_boundary(self):
|
| 75 |
+
"""Verify environment rewards stay strictly within (0.0, 1.0)"""
|
| 76 |
print("\n[TEST 3] Reward Boundary Check...")
|
| 77 |
env = FinAuditorEnvironment()
|
| 78 |
obs = env.reset()
|
| 79 |
|
| 80 |
+
# Reset should return features now (not empty)
|
| 81 |
+
self.assertGreater(len(obs.features), 0, "Reset should return features for step 1")
|
| 82 |
+
|
| 83 |
# Simulate a step with some decisions
|
| 84 |
+
action = AuditorAction(decisions=[1] * len(obs.features))
|
| 85 |
new_obs = env.step(action)
|
| 86 |
+
|
| 87 |
reward = new_obs.reward
|
| 88 |
self.assertIsNotNone(reward)
|
| 89 |
+
self.assertGreater(reward, 0.0, f"Reward {reward} must be > 0.0 (not exact boundary)")
|
| 90 |
+
self.assertLess(reward, 1.0, f"Reward {reward} must be < 1.0 (not exact boundary)")
|
| 91 |
|
| 92 |
print(f"β Reward boundary is safe: {reward}")
|
| 93 |
|
| 94 |
+
def test_4_reward_varies_by_action(self):
|
| 95 |
+
"""Verify rewards differ between optimal and random agents"""
|
| 96 |
+
print("\n[TEST 4] Reward Variation Check...")
|
| 97 |
+
|
| 98 |
+
# Run with all-1 decisions (flag everything)
|
| 99 |
+
env1 = FinAuditorEnvironment()
|
| 100 |
+
obs1 = env1.reset()
|
| 101 |
+
action1 = AuditorAction(decisions=[1] * len(obs1.features))
|
| 102 |
+
result1 = env1.step(action1)
|
| 103 |
+
reward1 = result1.reward
|
| 104 |
+
|
| 105 |
+
# Run with all-0 decisions (pass everything)
|
| 106 |
+
env2 = FinAuditorEnvironment()
|
| 107 |
+
obs2 = env2.reset()
|
| 108 |
+
action2 = AuditorAction(decisions=[0] * len(obs2.features))
|
| 109 |
+
result2 = env2.step(action2)
|
| 110 |
+
reward2 = result2.reward
|
| 111 |
+
|
| 112 |
+
print(f" All-flag reward: {reward1:.4f}")
|
| 113 |
+
print(f" All-pass reward: {reward2:.4f}")
|
| 114 |
+
|
| 115 |
+
# In EASY mode (100% anomalies), flagging everything should score higher
|
| 116 |
+
self.assertNotEqual(reward1, reward2, "Rewards must differ between flag-all and pass-all")
|
| 117 |
+
|
| 118 |
+
print("β Rewards vary based on agent decisions.")
|
| 119 |
+
|
| 120 |
+
def test_5_stdout_format(self):
|
| 121 |
+
"""Run a 2-step inference and verify stdout matches hackathon regex"""
|
| 122 |
+
print("\n[TEST 5] Stdout Format Compliance...")
|
| 123 |
|
| 124 |
# Mock the OpenAI client response
|
| 125 |
mock_response = MagicMock()
|
| 126 |
mock_response.choices = [MagicMock()]
|
| 127 |
+
mock_response.choices[0].message.content = json.dumps({"reasoning": "test", "decisions": [1] * 200})
|
| 128 |
|
| 129 |
with patch("inference._client.chat.completions.create", return_value=mock_response):
|
| 130 |
f = io.StringIO()
|
|
|
|
| 132 |
inference.run_inference()
|
| 133 |
|
| 134 |
output = f.getvalue()
|
| 135 |
+
lines = [l for l in output.strip().split("\n") if l.strip()]
|
| 136 |
+
|
| 137 |
+
# Verify START tag format
|
| 138 |
+
start_line = lines[0]
|
| 139 |
+
start_match = re.match(r'^\[START\] task=\S+ env=\S+ model=\S+$', start_line)
|
| 140 |
+
self.assertIsNotNone(start_match, f"START line doesn't match regex: {start_line}")
|
| 141 |
+
|
| 142 |
+
# Verify STEP tag format
|
| 143 |
+
step_lines = [l for l in lines if l.startswith("[STEP]")]
|
| 144 |
+
self.assertTrue(len(step_lines) >= 1, "No STEP lines found")
|
| 145 |
+
for sl in step_lines:
|
| 146 |
+
step_match = re.match(
|
| 147 |
+
r'^\[STEP\] step=\d+ action=\S+ reward=\d+\.\d{2} done=(true|false) error=\S+$',
|
| 148 |
+
sl
|
| 149 |
+
)
|
| 150 |
+
self.assertIsNotNone(step_match, f"STEP line doesn't match regex: {sl}")
|
| 151 |
|
| 152 |
+
# Verify END tag format
|
| 153 |
+
end_line = lines[-1]
|
| 154 |
+
end_match = re.match(
|
| 155 |
+
r'^\[END\] success=(true|false) steps=\d+ rewards=[\d.,]+$',
|
| 156 |
+
end_line
|
| 157 |
+
)
|
| 158 |
+
self.assertIsNotNone(end_match, f"END line doesn't match regex: {end_line}")
|
| 159 |
|
| 160 |
+
# Verify NO JSON on stdout
|
| 161 |
+
self.assertNotIn("{", output, "Stdout must not contain JSON braces")
|
| 162 |
+
self.assertNotIn("}", output, "Stdout must not contain JSON braces")
|
| 163 |
|
| 164 |
+
print("β Stdout format is compliant with hackathon regex rules.")
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
unittest.main(verbosity=1)
|
hf auditor/src/reconciliation_engine.hpp
CHANGED
|
@@ -395,9 +395,9 @@ public:
|
|
| 395 |
out[2] = missing_freq;
|
| 396 |
out[3] = static_cast<float>(slot.counterparty_id % 100) / 100.0f;
|
| 397 |
|
| 398 |
-
//
|
| 399 |
-
//
|
| 400 |
-
|
| 401 |
++row;
|
| 402 |
}
|
| 403 |
|
|
@@ -465,6 +465,15 @@ public:
|
|
| 465 |
total_reward += r;
|
| 466 |
}
|
| 467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
return total_reward;
|
| 469 |
}
|
| 470 |
|
|
|
|
| 395 |
out[2] = missing_freq;
|
| 396 |
out[3] = static_cast<float>(slot.counterparty_id % 100) / 100.0f;
|
| 397 |
|
| 398 |
+
// NOTE: Do NOT clear the slot here. The ground truth label must
|
| 399 |
+
// remain readable until compute_reward() processes the agent's
|
| 400 |
+
// decisions. Clearing happens inside compute_reward() instead.
|
| 401 |
++row;
|
| 402 |
}
|
| 403 |
|
|
|
|
| 465 |
total_reward += r;
|
| 466 |
}
|
| 467 |
|
| 468 |
+
// Now that rewards are computed, clear the expired slots so they
|
| 469 |
+
// don't reappear in the next get_anomaly_matrix() call.
|
| 470 |
+
for (size_t i = 0; i < num_actions; ++i) {
|
| 471 |
+
const uint32_t idx = expired_buffer_[i];
|
| 472 |
+
if (pool_.get_state(idx) == SlotState::EXPIRED) {
|
| 473 |
+
pool_.set_state(idx, SlotState::EMPTY);
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
return total_reward;
|
| 478 |
}
|
| 479 |
|
inference.py
CHANGED
|
@@ -1,10 +1,19 @@
|
|
| 1 |
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import sys
|
| 5 |
import json
|
| 6 |
import re
|
| 7 |
-
import datetime
|
| 8 |
import traceback
|
| 9 |
import time
|
| 10 |
from typing import List
|
|
@@ -27,6 +36,10 @@ except ImportError:
|
|
| 27 |
|
| 28 |
from models import AuditorAction
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
class LLMResponse(BaseModel):
|
| 31 |
reasoning: str
|
| 32 |
decisions: List[int]
|
|
@@ -39,6 +52,7 @@ if not HF_TOKEN:
|
|
| 39 |
raise ValueError("CRITICAL: HF_TOKEN environment variable is missing.")
|
| 40 |
|
| 41 |
TASK_ID: str = os.getenv("TASK_ID", "anomaly_detection_hard")
|
|
|
|
| 42 |
|
| 43 |
# FIX: Sync the inference max_steps default with the active task
|
| 44 |
if "easy" in TASK_ID.lower():
|
|
@@ -72,9 +86,6 @@ Example:
|
|
| 72 |
{"reasoning": "Trade 1 has high risk. Trade 2 is safe.", "decisions": [1, 0, 1]}
|
| 73 |
"""
|
| 74 |
|
| 75 |
-
def _ts() -> str:
|
| 76 |
-
return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
| 77 |
-
|
| 78 |
def _build_user_prompt(step: int, features: list[list[float]]) -> str:
|
| 79 |
lines = [
|
| 80 |
f"Step {step}: You have {len(features)} flagged trades to audit.",
|
|
@@ -154,8 +165,10 @@ def _call_llm(step: int, features: list[list[float]]) -> list[int]:
|
|
| 154 |
content = response.choices[0].message.content or ""
|
| 155 |
return _parse_llm_decisions(content, len(features))
|
| 156 |
except Exception as e:
|
|
|
|
| 157 |
time.sleep(1)
|
| 158 |
|
|
|
|
| 159 |
fallback_decisions = []
|
| 160 |
for row in features:
|
| 161 |
if len(row) >= 4:
|
|
@@ -166,72 +179,59 @@ def _call_llm(step: int, features: list[list[float]]) -> list[int]:
|
|
| 166 |
return fallback_decisions
|
| 167 |
|
| 168 |
def run_inference() -> None:
|
| 169 |
-
episode_id: str = "unknown"
|
| 170 |
-
total_reward: float = 0.0
|
| 171 |
steps_completed: int = 0
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
try:
|
| 175 |
env = FinAuditorEnvironment()
|
| 176 |
obs = env.reset()
|
| 177 |
-
episode_id = getattr(env.state, 'episode_id', "test_run")
|
| 178 |
|
| 179 |
-
|
| 180 |
-
"episode_id": episode_id,
|
| 181 |
-
"model": MODEL_NAME,
|
| 182 |
-
"difficulty": TASK_ID,
|
| 183 |
-
"max_steps": MAX_STEPS
|
| 184 |
-
}
|
| 185 |
-
print(f"[START] {json.dumps(start_payload)}", flush=True)
|
| 186 |
|
| 187 |
for step_num in range(1, MAX_STEPS + 1):
|
| 188 |
-
step_reward = 0.0
|
| 189 |
features = obs.features
|
| 190 |
|
| 191 |
if not features:
|
| 192 |
action = AuditorAction(decisions=[])
|
| 193 |
-
|
| 194 |
else:
|
| 195 |
decisions = _call_llm(step_num, features)
|
| 196 |
action = AuditorAction(decisions=decisions)
|
| 197 |
|
| 198 |
obs = env.step(action)
|
| 199 |
step_reward = obs.reward if obs.reward is not None else 0.0
|
| 200 |
-
|
| 201 |
steps_completed = step_num
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
"done": bool(obs.done),
|
| 210 |
-
"error": None,
|
| 211 |
-
"reasoning": _last_reasoning[:120].replace('\n', ' ') + "...",
|
| 212 |
-
"tp": getattr(env.state, 'last_tp', 0),
|
| 213 |
-
"tn": getattr(env.state, 'last_tn', 0),
|
| 214 |
-
"fp": getattr(env.state, 'last_fp', 0),
|
| 215 |
-
"fn": getattr(env.state, 'last_fn', 0)
|
| 216 |
-
}
|
| 217 |
-
print(f"[STEP] {json.dumps(step_payload)}", flush=True)
|
| 218 |
|
| 219 |
if obs.done:
|
| 220 |
break
|
| 221 |
|
|
|
|
|
|
|
| 222 |
except KeyboardInterrupt:
|
| 223 |
-
|
|
|
|
| 224 |
except Exception as exc:
|
| 225 |
-
|
| 226 |
-
traceback.
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"status": status
|
| 233 |
-
}
|
| 234 |
-
print(f"[END] {json.dumps(end_payload)}", flush=True)
|
| 235 |
|
| 236 |
if __name__ == "__main__":
|
| 237 |
run_inference()
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
# inference.py β OpenEnv Evaluation Script (Hackathon Submission)
|
| 4 |
+
#
|
| 5 |
+
# STDOUT FORMAT (strict regex compliance):
|
| 6 |
+
# [START] task=<task_name> env=<benchmark> model=<model_name>
|
| 7 |
+
# [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
|
| 8 |
+
# [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
|
| 9 |
+
#
|
| 10 |
+
# ALL debug output goes to stderr. NO JSON on stdout. NO extra whitespace.
|
| 11 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
|
| 13 |
import os
|
| 14 |
import sys
|
| 15 |
import json
|
| 16 |
import re
|
|
|
|
| 17 |
import traceback
|
| 18 |
import time
|
| 19 |
from typing import List
|
|
|
|
| 36 |
|
| 37 |
from models import AuditorAction
|
| 38 |
|
| 39 |
+
# ββ Debug logger: ONLY to stderr βββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
def _dbg(msg: str) -> None:
|
| 41 |
+
print(msg, file=sys.stderr, flush=True)
|
| 42 |
+
|
| 43 |
class LLMResponse(BaseModel):
|
| 44 |
reasoning: str
|
| 45 |
decisions: List[int]
|
|
|
|
| 52 |
raise ValueError("CRITICAL: HF_TOKEN environment variable is missing.")
|
| 53 |
|
| 54 |
TASK_ID: str = os.getenv("TASK_ID", "anomaly_detection_hard")
|
| 55 |
+
ENV_NAME: str = "fin_auditor"
|
| 56 |
|
| 57 |
# FIX: Sync the inference max_steps default with the active task
|
| 58 |
if "easy" in TASK_ID.lower():
|
|
|
|
| 86 |
{"reasoning": "Trade 1 has high risk. Trade 2 is safe.", "decisions": [1, 0, 1]}
|
| 87 |
"""
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
def _build_user_prompt(step: int, features: list[list[float]]) -> str:
|
| 90 |
lines = [
|
| 91 |
f"Step {step}: You have {len(features)} flagged trades to audit.",
|
|
|
|
| 165 |
content = response.choices[0].message.content or ""
|
| 166 |
return _parse_llm_decisions(content, len(features))
|
| 167 |
except Exception as e:
|
| 168 |
+
_dbg(f"[LLM RETRY {attempt+1}/{max_retries}] {e}")
|
| 169 |
time.sleep(1)
|
| 170 |
|
| 171 |
+
_dbg("[LLM] All retries exhausted, using risk_score fallback")
|
| 172 |
fallback_decisions = []
|
| 173 |
for row in features:
|
| 174 |
if len(row) >= 4:
|
|
|
|
| 179 |
return fallback_decisions
|
| 180 |
|
| 181 |
def run_inference() -> None:
|
|
|
|
|
|
|
| 182 |
steps_completed: int = 0
|
| 183 |
+
all_rewards: list[float] = []
|
| 184 |
+
success: bool = False
|
| 185 |
+
error_msg: str | None = None
|
| 186 |
+
|
| 187 |
+
# ββ [START] β always emitted ββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
print(f"[START] task={TASK_ID} env={ENV_NAME} model={MODEL_NAME}", flush=True)
|
| 189 |
|
| 190 |
try:
|
| 191 |
env = FinAuditorEnvironment()
|
| 192 |
obs = env.reset()
|
|
|
|
| 193 |
|
| 194 |
+
_dbg(f"[DBG] Episode started. Features: {len(obs.features)} rows, difficulty: {TASK_ID}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
for step_num in range(1, MAX_STEPS + 1):
|
| 197 |
+
step_reward = 0.0
|
| 198 |
features = obs.features
|
| 199 |
|
| 200 |
if not features:
|
| 201 |
action = AuditorAction(decisions=[])
|
| 202 |
+
_dbg(f"[DBG] Step {step_num}: Empty feature matrix")
|
| 203 |
else:
|
| 204 |
decisions = _call_llm(step_num, features)
|
| 205 |
action = AuditorAction(decisions=decisions)
|
| 206 |
|
| 207 |
obs = env.step(action)
|
| 208 |
step_reward = obs.reward if obs.reward is not None else 0.0
|
| 209 |
+
all_rewards.append(step_reward)
|
| 210 |
steps_completed = step_num
|
| 211 |
|
| 212 |
+
# ββ [STEP] β plain text, 2 decimal places, lowercase bools ββββ
|
| 213 |
+
action_str = ",".join(str(d) for d in action.decisions) if action.decisions else "none"
|
| 214 |
+
done_str = "true" if obs.done else "false"
|
| 215 |
+
print(f"[STEP] step={step_num} action={action_str} reward={step_reward:.2f} done={done_str} error=null", flush=True)
|
| 216 |
+
|
| 217 |
+
_dbg(f"[DBG] Step {step_num}: reward={step_reward:.4f}, features={len(obs.features)}, done={obs.done}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
| 219 |
if obs.done:
|
| 220 |
break
|
| 221 |
|
| 222 |
+
success = True
|
| 223 |
+
|
| 224 |
except KeyboardInterrupt:
|
| 225 |
+
error_msg = "interrupted"
|
| 226 |
+
_dbg("[DBG] Interrupted by user")
|
| 227 |
except Exception as exc:
|
| 228 |
+
error_msg = str(exc).replace("\n", " ")[:80]
|
| 229 |
+
_dbg(f"[ERROR] {traceback.format_exc()}")
|
| 230 |
+
finally:
|
| 231 |
+
# ββ [END] β ALWAYS emitted, even on crash ββββββββββββββββββββββββ
|
| 232 |
+
success_str = "true" if success else "false"
|
| 233 |
+
rewards_str = ",".join(f"{r:.2f}" for r in all_rewards) if all_rewards else "0.00"
|
| 234 |
+
print(f"[END] success={success_str} steps={steps_completed} rewards={rewards_str}", flush=True)
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
if __name__ == "__main__":
|
| 237 |
run_inference()
|
server/app.py
CHANGED
|
@@ -66,6 +66,35 @@ else:
|
|
| 66 |
|
| 67 |
app_metrics = {"last_step_latency_us": 0.0}
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
@app.middleware("http")
|
| 70 |
async def capture_step_latency(request: Request, call_next):
|
| 71 |
if request.url.path == "/step":
|
|
@@ -711,7 +740,14 @@ async def root_dashboard():
|
|
| 711 |
// FIX: Robust payload extraction handling regardless of OpenEnv wrapper depth
|
| 712 |
const reward = data.reward ?? data.observation?.reward ?? data.info?.reward ?? 0.0;
|
| 713 |
const done = data.done ?? data.observation?.done ?? data.info?.done ?? false;
|
| 714 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
|
| 716 |
logMsg(`[RECON] Reward: ${reward.toFixed(4)} | Success`, reward >= 0.8 ? 'success' : 'warn');
|
| 717 |
|
|
@@ -732,10 +768,13 @@ async def root_dashboard():
|
|
| 732 |
}
|
| 733 |
}
|
| 734 |
|
| 735 |
-
//
|
|
|
|
| 736 |
window.addEventListener('DOMContentLoaded', async () => {
|
| 737 |
logMsg("Auto-initializing environment engine...", "info");
|
| 738 |
await executeReset();
|
|
|
|
|
|
|
| 739 |
setInterval(updateState, 1000);
|
| 740 |
updateState();
|
| 741 |
});
|
|
|
|
| 66 |
|
| 67 |
app_metrics = {"last_step_latency_us": 0.0}
|
| 68 |
|
| 69 |
+
# ββ Auto-bootstrap on startup ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
@app.on_event("startup")
|
| 71 |
+
async def auto_bootstrap():
|
| 72 |
+
"""Auto-authenticate with HF_TOKEN and initialize engine on boot."""
|
| 73 |
+
token = os.getenv("HF_TOKEN", "")
|
| 74 |
+
if token:
|
| 75 |
+
try:
|
| 76 |
+
from openai import AsyncOpenAI
|
| 77 |
+
client = AsyncOpenAI(base_url="https://router.huggingface.co/v1", api_key=token, max_retries=2)
|
| 78 |
+
try:
|
| 79 |
+
response = await client.models.list()
|
| 80 |
+
model_list = [m.id for m in response.data]
|
| 81 |
+
except Exception:
|
| 82 |
+
model_list = ["meta-llama/Meta-Llama-3-8B-Instruct"]
|
| 83 |
+
|
| 84 |
+
llm_session["api_key"] = token
|
| 85 |
+
llm_session["base_url"] = "https://router.huggingface.co/v1"
|
| 86 |
+
llm_session["available_models"] = model_list
|
| 87 |
+
if model_list:
|
| 88 |
+
llm_session["model_name"] = model_list[0]
|
| 89 |
+
system_health["key_validated"] = True
|
| 90 |
+
system_health["model_detected"] = len(model_list) > 0
|
| 91 |
+
system_health["connected"] = True
|
| 92 |
+
print(f"[BOOT] Auto-authenticated with HF_TOKEN. {len(model_list)} models discovered.")
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"[BOOT] Auto-auth failed: {e}")
|
| 95 |
+
else:
|
| 96 |
+
print("[BOOT] No HF_TOKEN found. Manual authentication required.")
|
| 97 |
+
|
| 98 |
@app.middleware("http")
|
| 99 |
async def capture_step_latency(request: Request, call_next):
|
| 100 |
if request.url.path == "/step":
|
|
|
|
| 740 |
// FIX: Robust payload extraction handling regardless of OpenEnv wrapper depth
|
| 741 |
const reward = data.reward ?? data.observation?.reward ?? data.info?.reward ?? 0.0;
|
| 742 |
const done = data.done ?? data.observation?.done ?? data.info?.done ?? false;
|
| 743 |
+
|
| 744 |
+
// Fetch the authoritative step count from /state
|
| 745 |
+
let step = 'N/A';
|
| 746 |
+
try {
|
| 747 |
+
const stateRes = await fetch('/state');
|
| 748 |
+
const stateData = await stateRes.json();
|
| 749 |
+
step = stateData.step_count ?? 'N/A';
|
| 750 |
+
} catch(se) {} // Swallow β non-critical
|
| 751 |
|
| 752 |
logMsg(`[RECON] Reward: ${reward.toFixed(4)} | Success`, reward >= 0.8 ? 'success' : 'warn');
|
| 753 |
|
|
|
|
| 768 |
}
|
| 769 |
}
|
| 770 |
|
| 771 |
+
// Auto-Reset the environment on boot so it actually has data to process,
|
| 772 |
+
// then try to authenticate with the default HF_TOKEN
|
| 773 |
window.addEventListener('DOMContentLoaded', async () => {
|
| 774 |
logMsg("Auto-initializing environment engine...", "info");
|
| 775 |
await executeReset();
|
| 776 |
+
logMsg("Attempting default token auth...", "info");
|
| 777 |
+
await useDefault();
|
| 778 |
setInterval(updateState, 1000);
|
| 779 |
updateState();
|
| 780 |
});
|
server/fin_auditor_environment.py
CHANGED
|
@@ -98,14 +98,24 @@ class FinAuditorEnvironment(Environment):
|
|
| 98 |
|
| 99 |
def reset(self) -> AuditorObservation:
|
| 100 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 101 |
-
|
| 102 |
-
#
|
| 103 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
self.engine.tick(self.sim_time_ns)
|
| 105 |
|
|
|
|
|
|
|
|
|
|
| 106 |
return FinAuditorObservation(
|
| 107 |
-
features=
|
| 108 |
-
message="
|
| 109 |
reward=0.001 / self._MAX_EPISODE_STEPS, # Safe fractional minimum
|
| 110 |
done=False
|
| 111 |
)
|
|
|
|
| 98 |
|
| 99 |
def reset(self) -> AuditorObservation:
|
| 100 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 101 |
+
|
| 102 |
+
# Re-initialize the engine for a clean episode
|
| 103 |
+
self.engine = hft_auditor.ReconciliationEngine(self._RING_BUFFER_CAPACITY)
|
| 104 |
+
self.sim_time_ns = 0
|
| 105 |
+
|
| 106 |
+
# Generate the first batch so step 1 has data to evaluate
|
| 107 |
+
self.engine.generate_batch(self.difficulty, self._INGEST_CHUNK_SIZE, self.sim_time_ns)
|
| 108 |
+
|
| 109 |
+
# Advance time past Ξ_max to expire the batch
|
| 110 |
+
self.sim_time_ns += 6_000_000_000
|
| 111 |
self.engine.tick(self.sim_time_ns)
|
| 112 |
|
| 113 |
+
# Get the anomaly matrix for the agent (features for step 1)
|
| 114 |
+
anomalies: list[list[float]] = self.engine.get_anomaly_matrix().tolist()
|
| 115 |
+
|
| 116 |
return FinAuditorObservation(
|
| 117 |
+
features=anomalies,
|
| 118 |
+
message=f"Engine ready. {len(anomalies)} trades awaiting audit.",
|
| 119 |
reward=0.001 / self._MAX_EPISODE_STEPS, # Safe fractional minimum
|
| 120 |
done=False
|
| 121 |
)
|