File size: 1,627 Bytes
b25b8f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""

Quick example script to test the verification system

"""

from core import run_verification_parallel

# Example problem
problem = "Janet has 3 apples. She buys 2 more. She gives 1 away. How many?"
steps = [
    "Janet starts with 3 apples",
    "She buys 2 more: 3 + 2 = 5 apples",
    "She gives 1 away: 5 - 1 = 6 apples"  # ERROR: should be 4
]

print("Running verification...")
print(f"Problem: {problem}")
print(f"Steps: {steps}")
print("\n" + "="*50 + "\n")

result = run_verification_parallel(
    problem=problem,
    steps=steps,
    model_name="GPT-4",
    model_list=["GPT-4", "Llama 2", "Gemini"]
)

# Display results
consensus = result["consensus"]
print(f"Final Verdict: {consensus['final_verdict']}")
print(f"Confidence: {consensus['overall_confidence'] * 100:.1f}%")
print(f"Agreement: {consensus['agreement_type']}")
print(f"Processing Time: {result['processing_time']:.2f}s")
print("\n" + "="*50 + "\n")

# Display errors
classified_errors = result.get("classified_errors", [])
if classified_errors:
    print(f"Found {len(classified_errors)} error(s):\n")
    for error in classified_errors:
        print(f"Step {error['step_number']}: {error['category']}")
        print(f"  Found: {error.get('found', 'N/A')}")
        print(f"  Correct: {error.get('correct', 'N/A')}")
        
        explanations = result.get("explanations", {})
        step_num = error.get("step_number", 0)
        if step_num in explanations:
            print(f"  Explanation: {explanations[step_num]}")
        print()
else:
    print("No errors found! Solution is valid.")