TheRealAIGuy commited on
Commit
7435d89
·
1 Parent(s): e6c33aa

New Regex Implemented in final_check.py (Also improved inference)

Browse files
Files changed (2) hide show
  1. final_check.py +2 -2
  2. inference.py +2 -2
final_check.py CHANGED
@@ -144,7 +144,7 @@ class FinalIntegrityCheck(unittest.TestCase):
144
  self.assertTrue(len(step_lines) >= 1, "No STEP lines found")
145
  for sl in step_lines:
146
  step_match = re.match(
147
- r'^\[STEP\] step=\d+ action=\S+ reward=\d+\.\d{2} done=(true|false) error=\S+$',
148
  sl
149
  )
150
  self.assertIsNotNone(step_match, f"STEP line doesn't match regex: {sl}")
@@ -152,7 +152,7 @@ class FinalIntegrityCheck(unittest.TestCase):
152
  # Verify END tag format
153
  end_line = lines[-1]
154
  end_match = re.match(
155
- r'^\[END\] success=(true|false) steps=\d+ score=\d+\.\d{3} rewards=[\d.,]+$',
156
  end_line
157
  )
158
  self.assertIsNotNone(end_match, f"END line doesn't match regex: {end_line}")
 
144
  self.assertTrue(len(step_lines) >= 1, "No STEP lines found")
145
  for sl in step_lines:
146
  step_match = re.match(
147
+ r'^\[STEP\] step=\d+ action=.*? reward=-?\d+\.\d{2} done=(true|false) error=.*$',
148
  sl
149
  )
150
  self.assertIsNotNone(step_match, f"STEP line doesn't match regex: {sl}")
 
152
  # Verify END tag format
153
  end_line = lines[-1]
154
  end_match = re.match(
155
+ r'^\[END\] success=(true|false) steps=\d+ score=-?\d+\.\d+ rewards=(?:-?\d+\.\d{2}(?:,-?\d+\.\d{2})*)?$',
156
  end_line
157
  )
158
  self.assertIsNotNone(end_match, f"END line doesn't match regex: {end_line}")
inference.py CHANGED
@@ -115,7 +115,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
115
 
116
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
117
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
118
- print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
119
 
120
 
121
  def build_user_prompt(step: int, features: list[list[float]]) -> str:
@@ -294,7 +294,7 @@ def main() -> None:
294
  # Ensure absolutely no element is exactly 0.0 or 1.0 or outside the valid range.
295
  for i in range(len(rewards)):
296
  rewards[i] = float(max(0.01, min(0.99, rewards[i])))
297
-
298
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
299
 
300
 
 
115
 
116
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
117
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
118
+ print(f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
119
 
120
 
121
  def build_user_prompt(step: int, features: list[list[float]]) -> str:
 
294
  # Ensure absolutely no element is exactly 0.0 or 1.0 or outside the valid range.
295
  for i in range(len(rewards)):
296
  rewards[i] = float(max(0.01, min(0.99, rewards[i])))
297
+
298
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
299
 
300