Varshith dharmaj commited on
Commit
1928bc2
·
verified ·
1 Parent(s): 9b4b665

Upload scripts/edge_case_tester.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/edge_case_tester.py +65 -0
scripts/edge_case_tester.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import time
4
+
5
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
6
+ sys.path.insert(0, PROJECT_ROOT)
7
+
8
+ from core import run_verification_parallel
9
+
10
+ EDGE_CASES = [
11
+ {
12
+ "name": "Deliberate Self-Contradiction Paradox",
13
+ "problem": "Prove that 1 = 2 using standard arithmetic.",
14
+ "steps": [
15
+ "Let a = b",
16
+ "a^2 = ab",
17
+ "a^2 - b^2 = ab - b^2",
18
+ "(a-b)(a+b) = b(a-b)",
19
+ "a+b = b",
20
+ "Since a=b, 2b = b",
21
+ "2 = 1"
22
+ ]
23
+ },
24
+ {
25
+ "name": "Calculus Ambiguity (Division by Zero limit)",
26
+ "problem": "Evaluate the limit of 1/x as x approaches 0.",
27
+ "steps": [
28
+ "We want to find the limit of 1/x as x goes to 0.",
29
+ "Plug in 0 for x.",
30
+ "1 / 0 is infinity.",
31
+ "Therefore the limit is infinity."
32
+ ]
33
+ }
34
+ ]
35
+
36
+ def run_tests():
37
+ for case in EDGE_CASES:
38
+ print(f"\n======================================")
39
+ print(f"🧪 Running Edge Case: {case['name']}")
40
+ print(f"Problem: {case['problem']}")
41
+ print(f"======================================")
42
+
43
+ start = time.time()
44
+ for partial_res in run_verification_parallel(case['problem'], case['steps']):
45
+ # Stream the results logic to terminal
46
+ if partial_res.get("type") == "partial":
47
+ agent = partial_res["agent_name"]
48
+ ans = partial_res["agent_result"]["final_answer"]
49
+ print(f" [STREAM] {agent} finished analyzing. Conclusion: {ans}")
50
+ elif partial_res.get("type") == "final":
51
+ print("\n--- CONSENSUS RESULT ---")
52
+ print(f"Verdict: {partial_res['consensus'].get('final_verdict')}")
53
+ print(f"Confidence: {partial_res['consensus'].get('overall_confidence', 0)*100:.1f}%")
54
+ errors = partial_res.get('consensus', {}).get('classified_errors', [])
55
+ if errors:
56
+ print("Errors Caught:")
57
+ for err in errors:
58
+ print(f" - Step {err.get('step_number')}: {err.get('category')} (Found: {err.get('found')})")
59
+ else:
60
+ print("No explicit errors caught.")
61
+
62
+ print(f" => Edge case resolved in {time.time() - start:.2f}s\n")
63
+
64
+ if __name__ == "__main__":
65
+ run_tests()