Spaces:

Varshithdharmajv
/

mvm2-math-verification

Sleeping

App Files Files Community

Varshith dharmaj commited on Mar 12

Commit

1928bc2

verified ·

1 Parent(s): 9b4b665

Upload scripts/edge_case_tester.py with huggingface_hub

Browse files

Files changed (1) hide show

scripts/edge_case_tester.py +65 -0

scripts/edge_case_tester.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import sys
+import os
+import time
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+sys.path.insert(0, PROJECT_ROOT)
+from core import run_verification_parallel
+EDGE_CASES = [
+    {
+        "name": "Deliberate Self-Contradiction Paradox",
+        "problem": "Prove that 1 = 2 using standard arithmetic.",
+        "steps": [
+            "Let a = b",
+            "a^2 = ab",
+            "a^2 - b^2 = ab - b^2",
+            "(a-b)(a+b) = b(a-b)",
+            "a+b = b",
+            "Since a=b, 2b = b",
+            "2 = 1"
+        ]
+    },
+    {
+        "name": "Calculus Ambiguity (Division by Zero limit)",
+        "problem": "Evaluate the limit of 1/x as x approaches 0.",
+        "steps": [
+            "We want to find the limit of 1/x as x goes to 0.",
+            "Plug in 0 for x.",
+            "1 / 0 is infinity.",
+            "Therefore the limit is infinity."
+        ]
+    }
+]
+def run_tests():
+    for case in EDGE_CASES:
+        print(f"\n======================================")
+        print(f"🧪 Running Edge Case: {case['name']}")
+        print(f"Problem: {case['problem']}")
+        print(f"======================================")
+        start = time.time()
+        for partial_res in run_verification_parallel(case['problem'], case['steps']):
+            # Stream the results logic to terminal
+            if partial_res.get("type") == "partial":
+                agent = partial_res["agent_name"]
+                ans = partial_res["agent_result"]["final_answer"]
+                print(f" [STREAM] {agent} finished analyzing. Conclusion: {ans}")
+            elif partial_res.get("type") == "final":
+                print("\n--- CONSENSUS RESULT ---")
+                print(f"Verdict: {partial_res['consensus'].get('final_verdict')}")
+                print(f"Confidence: {partial_res['consensus'].get('overall_confidence', 0)*100:.1f}%")
+                errors = partial_res.get('consensus', {}).get('classified_errors', [])
+                if errors:
+                    print("Errors Caught:")
+                    for err in errors:
+                        print(f" - Step {err.get('step_number')}: {err.get('category')} (Found: {err.get('found')})")
+                else:
+                    print("No explicit errors caught.")
+        print(f"   => Edge case resolved in {time.time() - start:.2f}s\n")
+if __name__ == "__main__":
+    run_tests()