Spaces:

Varshithdharmajv
/

mvm2-math-verification

Running

mvm2-math-verification / scripts /edge_case_tester.py

Varshith dharmaj

Upload scripts/edge_case_tester.py with huggingface_hub

1928bc2 verified 17 days ago

2.48 kB

	import sys
	import os
	import time

	PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
	sys.path.insert(0, PROJECT_ROOT)

	from core import run_verification_parallel

	EDGE_CASES = [
	{
	"name": "Deliberate Self-Contradiction Paradox",
	"problem": "Prove that 1 = 2 using standard arithmetic.",
	"steps": [
	"Let a = b",
	"a^2 = ab",
	"a^2 - b^2 = ab - b^2",
	"(a-b)(a+b) = b(a-b)",
	"a+b = b",
	"Since a=b, 2b = b",
	"2 = 1"
	]
	},
	{
	"name": "Calculus Ambiguity (Division by Zero limit)",
	"problem": "Evaluate the limit of 1/x as x approaches 0.",
	"steps": [
	"We want to find the limit of 1/x as x goes to 0.",
	"Plug in 0 for x.",
	"1 / 0 is infinity.",
	"Therefore the limit is infinity."
	]
	}
	]

	def run_tests():
	for case in EDGE_CASES:
	print(f"\n======================================")
	print(f"🧪 Running Edge Case: {case['name']}")
	print(f"Problem: {case['problem']}")
	print(f"======================================")

	start = time.time()
	for partial_res in run_verification_parallel(case['problem'], case['steps']):
	# Stream the results logic to terminal
	if partial_res.get("type") == "partial":
	agent = partial_res["agent_name"]
	ans = partial_res["agent_result"]["final_answer"]
	print(f" [STREAM] {agent} finished analyzing. Conclusion: {ans}")
	elif partial_res.get("type") == "final":
	print("\n--- CONSENSUS RESULT ---")
	print(f"Verdict: {partial_res['consensus'].get('final_verdict')}")
	print(f"Confidence: {partial_res['consensus'].get('overall_confidence', 0)*100:.1f}%")
	errors = partial_res.get('consensus', {}).get('classified_errors', [])
	if errors:
	print("Errors Caught:")
	for err in errors:
	print(f" - Step {err.get('step_number')}: {err.get('category')} (Found: {err.get('found')})")
	else:
	print("No explicit errors caught.")

	print(f" => Edge case resolved in {time.time() - start:.2f}s\n")

	if __name__ == "__main__":
	run_tests()