SamaKool commited on
Commit
85228ff
Β·
1 Parent(s): 3385186

fix: corrected C++ memory clearance, enforced regex compliance, and bootstrapped RL state

Browse files
final_check.py CHANGED
@@ -1,6 +1,7 @@
1
  #!/usr/bin/env python3
2
  import os
3
  import sys
 
4
  import json
5
  import yaml
6
  import unittest
@@ -29,25 +30,25 @@ class FinalIntegrityCheck(unittest.TestCase):
29
  print("\n[TEST 1] LLM Parser Robustness...")
30
 
31
  # Test Case A: Markdown wrapped JSON
32
- dirty_json = "Here is the result:\n```json\n{\"decisions\": [0, 1, 2]}\n```\nHope this helps!"
33
  res = inference._parse_llm_decisions(dirty_json, 3)
34
- self.assertEqual(res, [0, 1, 2], "Failed to parse markdown-wrapped JSON")
35
 
36
  # Test Case B: Extra text before JSON
37
- extra_text = "The decisions are as follows: {\"decisions\": [1, 2]}"
38
  res = inference._parse_llm_decisions(extra_text, 2)
39
- self.assertEqual(res, [1, 2], "Failed to parse JSON with leading text")
40
 
41
- # Test Case C: Malformed JSON -> should trigger 'Flag All' (2) fallback
42
  malformed = "{\"decisions\": [0, 1, " # Missing closing bracket
43
  res = inference._parse_llm_decisions(malformed, 4)
44
- self.assertEqual(res, [2, 2, 2, 2], "Failed to trigger fallback on malformed JSON")
45
 
46
  # Test Case D: Correct length normalization
47
  wrong_len = "{\"decisions\": [1]}"
48
  res = inference._parse_llm_decisions(wrong_len, 3)
49
  self.assertEqual(len(res), 3, "Failed to normalize decision list length")
50
- self.assertEqual(res, [1, 2, 2], "Failed to pad short decision list with 2s")
51
 
52
  print("βœ“ LLM Parser logic is robust.")
53
 
@@ -71,30 +72,59 @@ class FinalIntegrityCheck(unittest.TestCase):
71
  print(f"βœ“ Spec matches. Found {len(tasks)} tasks.")
72
 
73
  def test_3_reward_boundary(self):
74
- """Verify environment rewards stay strictly within [0.0, 1.0]"""
75
  print("\n[TEST 3] Reward Boundary Check...")
76
  env = FinAuditorEnvironment()
77
  obs = env.reset()
78
 
 
 
 
79
  # Simulate a step with some decisions
80
- action = AuditorAction(decisions=[2] * len(obs.features))
81
  new_obs = env.step(action)
82
-
83
  reward = new_obs.reward
84
  self.assertIsNotNone(reward)
85
- self.assertGreaterEqual(reward, 0.0, f"Reward {reward} < 0.0")
86
- self.assertLessEqual(reward, 1.0, f"Reward {reward} > 1.0")
87
 
88
  print(f"βœ“ Reward boundary is safe: {reward}")
89
 
90
- def test_4_integration_dry_run(self):
91
- """Run a 2-step inference using a mocked OpenAI client"""
92
- print("\n[TEST 4] Integration Dry Run...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  # Mock the OpenAI client response
95
  mock_response = MagicMock()
96
  mock_response.choices = [MagicMock()]
97
- mock_response.choices[0].message.content = json.dumps({"decisions": [2] * 200}) # provide plenty
98
 
99
  with patch("inference._client.chat.completions.create", return_value=mock_response):
100
  f = io.StringIO()
@@ -102,18 +132,36 @@ class FinalIntegrityCheck(unittest.TestCase):
102
  inference.run_inference()
103
 
104
  output = f.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Verify structured logs appear
107
- self.assertIn("[START]", output)
108
- self.assertIn("[STEP] step=1", output)
109
- self.assertIn("[STEP] step=2", output)
110
- self.assertIn("[END]", output)
 
 
111
 
112
- # Check if rewards were logs
113
- self.assertIn("reward=", output)
114
- self.assertIn("cumulative_reward=", output)
115
 
116
- print("βœ“ Integration dry run successful. Logs are correctly formatted.")
117
 
118
  if __name__ == "__main__":
119
  unittest.main(verbosity=1)
 
1
  #!/usr/bin/env python3
2
  import os
3
  import sys
4
+ import re
5
  import json
6
  import yaml
7
  import unittest
 
30
  print("\n[TEST 1] LLM Parser Robustness...")
31
 
32
  # Test Case A: Markdown wrapped JSON
33
+ dirty_json = "Here is the result:\n```json\n{\"decisions\": [0, 1, 1]}\n```\nHope this helps!"
34
  res = inference._parse_llm_decisions(dirty_json, 3)
35
+ self.assertEqual(res, [0, 1, 1], "Failed to parse markdown-wrapped JSON")
36
 
37
  # Test Case B: Extra text before JSON
38
+ extra_text = "The decisions are as follows: {\"decisions\": [1, 1]}"
39
  res = inference._parse_llm_decisions(extra_text, 2)
40
+ self.assertEqual(res, [1, 1], "Failed to parse JSON with leading text")
41
 
42
+ # Test Case C: Malformed JSON -> should trigger 'Flag All' (1) fallback
43
  malformed = "{\"decisions\": [0, 1, " # Missing closing bracket
44
  res = inference._parse_llm_decisions(malformed, 4)
45
+ self.assertEqual(res, [1, 1, 1, 1], "Failed to trigger fallback on malformed JSON")
46
 
47
  # Test Case D: Correct length normalization
48
  wrong_len = "{\"decisions\": [1]}"
49
  res = inference._parse_llm_decisions(wrong_len, 3)
50
  self.assertEqual(len(res), 3, "Failed to normalize decision list length")
51
+ self.assertEqual(res, [1, 1, 1], "Failed to pad short decision list with 1s")
52
 
53
  print("βœ“ LLM Parser logic is robust.")
54
 
 
72
  print(f"βœ“ Spec matches. Found {len(tasks)} tasks.")
73
 
74
  def test_3_reward_boundary(self):
75
+ """Verify environment rewards stay strictly within (0.0, 1.0)"""
76
  print("\n[TEST 3] Reward Boundary Check...")
77
  env = FinAuditorEnvironment()
78
  obs = env.reset()
79
 
80
+ # Reset should return features now (not empty)
81
+ self.assertGreater(len(obs.features), 0, "Reset should return features for step 1")
82
+
83
  # Simulate a step with some decisions
84
+ action = AuditorAction(decisions=[1] * len(obs.features))
85
  new_obs = env.step(action)
86
+
87
  reward = new_obs.reward
88
  self.assertIsNotNone(reward)
89
+ self.assertGreater(reward, 0.0, f"Reward {reward} must be > 0.0 (not exact boundary)")
90
+ self.assertLess(reward, 1.0, f"Reward {reward} must be < 1.0 (not exact boundary)")
91
 
92
  print(f"βœ“ Reward boundary is safe: {reward}")
93
 
94
+ def test_4_reward_varies_by_action(self):
95
+ """Verify rewards differ between optimal and random agents"""
96
+ print("\n[TEST 4] Reward Variation Check...")
97
+
98
+ # Run with all-1 decisions (flag everything)
99
+ env1 = FinAuditorEnvironment()
100
+ obs1 = env1.reset()
101
+ action1 = AuditorAction(decisions=[1] * len(obs1.features))
102
+ result1 = env1.step(action1)
103
+ reward1 = result1.reward
104
+
105
+ # Run with all-0 decisions (pass everything)
106
+ env2 = FinAuditorEnvironment()
107
+ obs2 = env2.reset()
108
+ action2 = AuditorAction(decisions=[0] * len(obs2.features))
109
+ result2 = env2.step(action2)
110
+ reward2 = result2.reward
111
+
112
+ print(f" All-flag reward: {reward1:.4f}")
113
+ print(f" All-pass reward: {reward2:.4f}")
114
+
115
+ # In EASY mode (100% anomalies), flagging everything should score higher
116
+ self.assertNotEqual(reward1, reward2, "Rewards must differ between flag-all and pass-all")
117
+
118
+ print("βœ“ Rewards vary based on agent decisions.")
119
+
120
+ def test_5_stdout_format(self):
121
+ """Run a 2-step inference and verify stdout matches hackathon regex"""
122
+ print("\n[TEST 5] Stdout Format Compliance...")
123
 
124
  # Mock the OpenAI client response
125
  mock_response = MagicMock()
126
  mock_response.choices = [MagicMock()]
127
+ mock_response.choices[0].message.content = json.dumps({"reasoning": "test", "decisions": [1] * 200})
128
 
129
  with patch("inference._client.chat.completions.create", return_value=mock_response):
130
  f = io.StringIO()
 
132
  inference.run_inference()
133
 
134
  output = f.getvalue()
135
+ lines = [l for l in output.strip().split("\n") if l.strip()]
136
+
137
+ # Verify START tag format
138
+ start_line = lines[0]
139
+ start_match = re.match(r'^\[START\] task=\S+ env=\S+ model=\S+$', start_line)
140
+ self.assertIsNotNone(start_match, f"START line doesn't match regex: {start_line}")
141
+
142
+ # Verify STEP tag format
143
+ step_lines = [l for l in lines if l.startswith("[STEP]")]
144
+ self.assertTrue(len(step_lines) >= 1, "No STEP lines found")
145
+ for sl in step_lines:
146
+ step_match = re.match(
147
+ r'^\[STEP\] step=\d+ action=\S+ reward=\d+\.\d{2} done=(true|false) error=\S+$',
148
+ sl
149
+ )
150
+ self.assertIsNotNone(step_match, f"STEP line doesn't match regex: {sl}")
151
 
152
+ # Verify END tag format
153
+ end_line = lines[-1]
154
+ end_match = re.match(
155
+ r'^\[END\] success=(true|false) steps=\d+ rewards=[\d.,]+$',
156
+ end_line
157
+ )
158
+ self.assertIsNotNone(end_match, f"END line doesn't match regex: {end_line}")
159
 
160
+ # Verify NO JSON on stdout
161
+ self.assertNotIn("{", output, "Stdout must not contain JSON braces")
162
+ self.assertNotIn("}", output, "Stdout must not contain JSON braces")
163
 
164
+ print("βœ“ Stdout format is compliant with hackathon regex rules.")
165
 
166
  if __name__ == "__main__":
167
  unittest.main(verbosity=1)
hf auditor/src/reconciliation_engine.hpp CHANGED
@@ -395,9 +395,9 @@ public:
395
  out[2] = missing_freq;
396
  out[3] = static_cast<float>(slot.counterparty_id % 100) / 100.0f;
397
 
398
- // CRITICAL: Mark slot as EMPTY after reporting to Python so it
399
- // doesn't reappear in the next processing step.
400
- pool_.set_state(idx, SlotState::EMPTY);
401
  ++row;
402
  }
403
 
@@ -465,6 +465,15 @@ public:
465
  total_reward += r;
466
  }
467
 
 
 
 
 
 
 
 
 
 
468
  return total_reward;
469
  }
470
 
 
395
  out[2] = missing_freq;
396
  out[3] = static_cast<float>(slot.counterparty_id % 100) / 100.0f;
397
 
398
+ // NOTE: Do NOT clear the slot here. The ground truth label must
399
+ // remain readable until compute_reward() processes the agent's
400
+ // decisions. Clearing happens inside compute_reward() instead.
401
  ++row;
402
  }
403
 
 
465
  total_reward += r;
466
  }
467
 
468
+ // Now that rewards are computed, clear the expired slots so they
469
+ // don't reappear in the next get_anomaly_matrix() call.
470
+ for (size_t i = 0; i < num_actions; ++i) {
471
+ const uint32_t idx = expired_buffer_[i];
472
+ if (pool_.get_state(idx) == SlotState::EXPIRED) {
473
+ pool_.set_state(idx, SlotState::EMPTY);
474
+ }
475
+ }
476
+
477
  return total_reward;
478
  }
479
 
inference.py CHANGED
@@ -1,10 +1,19 @@
1
  #!/usr/bin/env python3
 
 
 
 
 
 
 
 
 
 
2
 
3
  import os
4
  import sys
5
  import json
6
  import re
7
- import datetime
8
  import traceback
9
  import time
10
  from typing import List
@@ -27,6 +36,10 @@ except ImportError:
27
 
28
  from models import AuditorAction
29
 
 
 
 
 
30
  class LLMResponse(BaseModel):
31
  reasoning: str
32
  decisions: List[int]
@@ -39,6 +52,7 @@ if not HF_TOKEN:
39
  raise ValueError("CRITICAL: HF_TOKEN environment variable is missing.")
40
 
41
  TASK_ID: str = os.getenv("TASK_ID", "anomaly_detection_hard")
 
42
 
43
  # FIX: Sync the inference max_steps default with the active task
44
  if "easy" in TASK_ID.lower():
@@ -72,9 +86,6 @@ Example:
72
  {"reasoning": "Trade 1 has high risk. Trade 2 is safe.", "decisions": [1, 0, 1]}
73
  """
74
 
75
- def _ts() -> str:
76
- return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
77
-
78
  def _build_user_prompt(step: int, features: list[list[float]]) -> str:
79
  lines = [
80
  f"Step {step}: You have {len(features)} flagged trades to audit.",
@@ -154,8 +165,10 @@ def _call_llm(step: int, features: list[list[float]]) -> list[int]:
154
  content = response.choices[0].message.content or ""
155
  return _parse_llm_decisions(content, len(features))
156
  except Exception as e:
 
157
  time.sleep(1)
158
 
 
159
  fallback_decisions = []
160
  for row in features:
161
  if len(row) >= 4:
@@ -166,72 +179,59 @@ def _call_llm(step: int, features: list[list[float]]) -> list[int]:
166
  return fallback_decisions
167
 
168
  def run_inference() -> None:
169
- episode_id: str = "unknown"
170
- total_reward: float = 0.0
171
  steps_completed: int = 0
172
- status: str = "SUCCESS"
 
 
 
 
 
173
 
174
  try:
175
  env = FinAuditorEnvironment()
176
  obs = env.reset()
177
- episode_id = getattr(env.state, 'episode_id', "test_run")
178
 
179
- start_payload = {
180
- "episode_id": episode_id,
181
- "model": MODEL_NAME,
182
- "difficulty": TASK_ID,
183
- "max_steps": MAX_STEPS
184
- }
185
- print(f"[START] {json.dumps(start_payload)}", flush=True)
186
 
187
  for step_num in range(1, MAX_STEPS + 1):
188
- step_reward = 0.0
189
  features = obs.features
190
 
191
  if not features:
192
  action = AuditorAction(decisions=[])
193
- _last_reasoning = "Empty matrix."
194
  else:
195
  decisions = _call_llm(step_num, features)
196
  action = AuditorAction(decisions=decisions)
197
 
198
  obs = env.step(action)
199
  step_reward = obs.reward if obs.reward is not None else 0.0
200
- total_reward += step_reward
201
  steps_completed = step_num
202
 
203
- # FIX: Ensure fractional precision is retained for validation
204
- step_payload = {
205
- "step": step_num,
206
- "anomalies": len(features),
207
- "reward": round(float(step_reward), 4),
208
- "cumulative_reward": round(float(total_reward), 4),
209
- "done": bool(obs.done),
210
- "error": None,
211
- "reasoning": _last_reasoning[:120].replace('\n', ' ') + "...",
212
- "tp": getattr(env.state, 'last_tp', 0),
213
- "tn": getattr(env.state, 'last_tn', 0),
214
- "fp": getattr(env.state, 'last_fp', 0),
215
- "fn": getattr(env.state, 'last_fn', 0)
216
- }
217
- print(f"[STEP] {json.dumps(step_payload)}", flush=True)
218
 
219
  if obs.done:
220
  break
221
 
 
 
222
  except KeyboardInterrupt:
223
- status = "INTERRUPTED"
 
224
  except Exception as exc:
225
- status = "ERROR"
226
- traceback.print_exc(file=sys.stderr)
227
-
228
- avg_reward = total_reward / max(steps_completed, 1)
229
- end_payload = {
230
- "total_reward": round(float(total_reward), 4),
231
- "avg_reward": round(float(avg_reward), 4),
232
- "status": status
233
- }
234
- print(f"[END] {json.dumps(end_payload)}", flush=True)
235
 
236
  if __name__ == "__main__":
237
  run_inference()
 
1
  #!/usr/bin/env python3
2
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
3
+ # inference.py β€” OpenEnv Evaluation Script (Hackathon Submission)
4
+ #
5
+ # STDOUT FORMAT (strict regex compliance):
6
+ # [START] task=<task_name> env=<benchmark> model=<model_name>
7
+ # [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
8
+ # [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
9
+ #
10
+ # ALL debug output goes to stderr. NO JSON on stdout. NO extra whitespace.
11
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
12
 
13
  import os
14
  import sys
15
  import json
16
  import re
 
17
  import traceback
18
  import time
19
  from typing import List
 
36
 
37
  from models import AuditorAction
38
 
39
+ # ── Debug logger: ONLY to stderr ─────────────────────────────────────────────
40
+ def _dbg(msg: str) -> None:
41
+ print(msg, file=sys.stderr, flush=True)
42
+
43
  class LLMResponse(BaseModel):
44
  reasoning: str
45
  decisions: List[int]
 
52
  raise ValueError("CRITICAL: HF_TOKEN environment variable is missing.")
53
 
54
  TASK_ID: str = os.getenv("TASK_ID", "anomaly_detection_hard")
55
+ ENV_NAME: str = "fin_auditor"
56
 
57
  # FIX: Sync the inference max_steps default with the active task
58
  if "easy" in TASK_ID.lower():
 
86
  {"reasoning": "Trade 1 has high risk. Trade 2 is safe.", "decisions": [1, 0, 1]}
87
  """
88
 
 
 
 
89
  def _build_user_prompt(step: int, features: list[list[float]]) -> str:
90
  lines = [
91
  f"Step {step}: You have {len(features)} flagged trades to audit.",
 
165
  content = response.choices[0].message.content or ""
166
  return _parse_llm_decisions(content, len(features))
167
  except Exception as e:
168
+ _dbg(f"[LLM RETRY {attempt+1}/{max_retries}] {e}")
169
  time.sleep(1)
170
 
171
+ _dbg("[LLM] All retries exhausted, using risk_score fallback")
172
  fallback_decisions = []
173
  for row in features:
174
  if len(row) >= 4:
 
179
  return fallback_decisions
180
 
181
  def run_inference() -> None:
 
 
182
  steps_completed: int = 0
183
+ all_rewards: list[float] = []
184
+ success: bool = False
185
+ error_msg: str | None = None
186
+
187
+ # ── [START] β€” always emitted ──────────────────────────────────────────
188
+ print(f"[START] task={TASK_ID} env={ENV_NAME} model={MODEL_NAME}", flush=True)
189
 
190
  try:
191
  env = FinAuditorEnvironment()
192
  obs = env.reset()
 
193
 
194
+ _dbg(f"[DBG] Episode started. Features: {len(obs.features)} rows, difficulty: {TASK_ID}")
 
 
 
 
 
 
195
 
196
  for step_num in range(1, MAX_STEPS + 1):
197
+ step_reward = 0.0
198
  features = obs.features
199
 
200
  if not features:
201
  action = AuditorAction(decisions=[])
202
+ _dbg(f"[DBG] Step {step_num}: Empty feature matrix")
203
  else:
204
  decisions = _call_llm(step_num, features)
205
  action = AuditorAction(decisions=decisions)
206
 
207
  obs = env.step(action)
208
  step_reward = obs.reward if obs.reward is not None else 0.0
209
+ all_rewards.append(step_reward)
210
  steps_completed = step_num
211
 
212
+ # ── [STEP] β€” plain text, 2 decimal places, lowercase bools ────
213
+ action_str = ",".join(str(d) for d in action.decisions) if action.decisions else "none"
214
+ done_str = "true" if obs.done else "false"
215
+ print(f"[STEP] step={step_num} action={action_str} reward={step_reward:.2f} done={done_str} error=null", flush=True)
216
+
217
+ _dbg(f"[DBG] Step {step_num}: reward={step_reward:.4f}, features={len(obs.features)}, done={obs.done}")
 
 
 
 
 
 
 
 
 
218
 
219
  if obs.done:
220
  break
221
 
222
+ success = True
223
+
224
  except KeyboardInterrupt:
225
+ error_msg = "interrupted"
226
+ _dbg("[DBG] Interrupted by user")
227
  except Exception as exc:
228
+ error_msg = str(exc).replace("\n", " ")[:80]
229
+ _dbg(f"[ERROR] {traceback.format_exc()}")
230
+ finally:
231
+ # ── [END] β€” ALWAYS emitted, even on crash ────────────────────────
232
+ success_str = "true" if success else "false"
233
+ rewards_str = ",".join(f"{r:.2f}" for r in all_rewards) if all_rewards else "0.00"
234
+ print(f"[END] success={success_str} steps={steps_completed} rewards={rewards_str}", flush=True)
 
 
 
235
 
236
  if __name__ == "__main__":
237
  run_inference()
server/app.py CHANGED
@@ -66,6 +66,35 @@ else:
66
 
67
  app_metrics = {"last_step_latency_us": 0.0}
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  @app.middleware("http")
70
  async def capture_step_latency(request: Request, call_next):
71
  if request.url.path == "/step":
@@ -711,7 +740,14 @@ async def root_dashboard():
711
  // FIX: Robust payload extraction handling regardless of OpenEnv wrapper depth
712
  const reward = data.reward ?? data.observation?.reward ?? data.info?.reward ?? 0.0;
713
  const done = data.done ?? data.observation?.done ?? data.info?.done ?? false;
714
- const step = data.step_count ?? data.observation?.step_count ?? data.info?.step_count ?? data.observation?.metadata?.step_count ?? 'N/A';
 
 
 
 
 
 
 
715
 
716
  logMsg(`[RECON] Reward: ${reward.toFixed(4)} | Success`, reward >= 0.8 ? 'success' : 'warn');
717
 
@@ -732,10 +768,13 @@ async def root_dashboard():
732
  }
733
  }
734
 
735
- // FIX: Auto-Reset the environment on boot so it actually has data to process
 
736
  window.addEventListener('DOMContentLoaded', async () => {
737
  logMsg("Auto-initializing environment engine...", "info");
738
  await executeReset();
 
 
739
  setInterval(updateState, 1000);
740
  updateState();
741
  });
 
66
 
67
  app_metrics = {"last_step_latency_us": 0.0}
68
 
69
+ # ── Auto-bootstrap on startup ────────────────────────────────────────────────
70
+ @app.on_event("startup")
71
+ async def auto_bootstrap():
72
+ """Auto-authenticate with HF_TOKEN and initialize engine on boot."""
73
+ token = os.getenv("HF_TOKEN", "")
74
+ if token:
75
+ try:
76
+ from openai import AsyncOpenAI
77
+ client = AsyncOpenAI(base_url="https://router.huggingface.co/v1", api_key=token, max_retries=2)
78
+ try:
79
+ response = await client.models.list()
80
+ model_list = [m.id for m in response.data]
81
+ except Exception:
82
+ model_list = ["meta-llama/Meta-Llama-3-8B-Instruct"]
83
+
84
+ llm_session["api_key"] = token
85
+ llm_session["base_url"] = "https://router.huggingface.co/v1"
86
+ llm_session["available_models"] = model_list
87
+ if model_list:
88
+ llm_session["model_name"] = model_list[0]
89
+ system_health["key_validated"] = True
90
+ system_health["model_detected"] = len(model_list) > 0
91
+ system_health["connected"] = True
92
+ print(f"[BOOT] Auto-authenticated with HF_TOKEN. {len(model_list)} models discovered.")
93
+ except Exception as e:
94
+ print(f"[BOOT] Auto-auth failed: {e}")
95
+ else:
96
+ print("[BOOT] No HF_TOKEN found. Manual authentication required.")
97
+
98
  @app.middleware("http")
99
  async def capture_step_latency(request: Request, call_next):
100
  if request.url.path == "/step":
 
740
  // FIX: Robust payload extraction handling regardless of OpenEnv wrapper depth
741
  const reward = data.reward ?? data.observation?.reward ?? data.info?.reward ?? 0.0;
742
  const done = data.done ?? data.observation?.done ?? data.info?.done ?? false;
743
+
744
+ // Fetch the authoritative step count from /state
745
+ let step = 'N/A';
746
+ try {
747
+ const stateRes = await fetch('/state');
748
+ const stateData = await stateRes.json();
749
+ step = stateData.step_count ?? 'N/A';
750
+ } catch(se) {} // Swallow β€” non-critical
751
 
752
  logMsg(`[RECON] Reward: ${reward.toFixed(4)} | Success`, reward >= 0.8 ? 'success' : 'warn');
753
 
 
768
  }
769
  }
770
 
771
+ // Auto-Reset the environment on boot so it actually has data to process,
772
+ // then try to authenticate with the default HF_TOKEN
773
  window.addEventListener('DOMContentLoaded', async () => {
774
  logMsg("Auto-initializing environment engine...", "info");
775
  await executeReset();
776
+ logMsg("Attempting default token auth...", "info");
777
+ await useDefault();
778
  setInterval(updateState, 1000);
779
  updateState();
780
  });
server/fin_auditor_environment.py CHANGED
@@ -98,14 +98,24 @@ class FinAuditorEnvironment(Environment):
98
 
99
  def reset(self) -> AuditorObservation:
100
  self._state = State(episode_id=str(uuid4()), step_count=0)
101
-
102
- # We intentionally return an empty matrix on reset.
103
- self.sim_time_ns += self._DELTA_MAX_NS
 
 
 
 
 
 
 
104
  self.engine.tick(self.sim_time_ns)
105
 
 
 
 
106
  return FinAuditorObservation(
107
- features=[],
108
- message="Fin Auditor engine ready.",
109
  reward=0.001 / self._MAX_EPISODE_STEPS, # Safe fractional minimum
110
  done=False
111
  )
 
98
 
99
  def reset(self) -> AuditorObservation:
100
  self._state = State(episode_id=str(uuid4()), step_count=0)
101
+
102
+ # Re-initialize the engine for a clean episode
103
+ self.engine = hft_auditor.ReconciliationEngine(self._RING_BUFFER_CAPACITY)
104
+ self.sim_time_ns = 0
105
+
106
+ # Generate the first batch so step 1 has data to evaluate
107
+ self.engine.generate_batch(self.difficulty, self._INGEST_CHUNK_SIZE, self.sim_time_ns)
108
+
109
+ # Advance time past Ξ”_max to expire the batch
110
+ self.sim_time_ns += 6_000_000_000
111
  self.engine.tick(self.sim_time_ns)
112
 
113
+ # Get the anomaly matrix for the agent (features for step 1)
114
+ anomalies: list[list[float]] = self.engine.get_anomaly_matrix().tolist()
115
+
116
  return FinAuditorObservation(
117
+ features=anomalies,
118
+ message=f"Engine ready. {len(anomalies)} trades awaiting audit.",
119
  reward=0.001 / self._MAX_EPISODE_STEPS, # Safe fractional minimum
120
  done=False
121
  )