Flamehaven commited on
Commit
afd145f
·
verified ·
1 Parent(s): 84fbdb9

Upload test_demo.py

Browse files
Files changed (1) hide show
  1. test_demo.py +222 -0
test_demo.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ProofCore Demo - Quick Test Script
3
+ Tests core functionality without requiring Gradio server
4
+ """
5
+
6
+ import sys
7
+ import time
8
+ from app import ProofCoreDemo, EXAMPLE_PROOFS, ProofStep
9
+
10
+
11
+ def test_verifier_initialization():
12
+ """Test 1: Verify initialization"""
13
+ print("[>] Test 1: Verifier Initialization...")
14
+ demo = ProofCoreDemo()
15
+ assert demo.verifier is not None
16
+ assert demo.current_proof is None
17
+ assert demo.results == []
18
+ print("[+] Verifier initialized successfully\n")
19
+
20
+
21
+ def test_load_examples():
22
+ """Test 2: Load example proofs"""
23
+ print("[>] Test 2: Loading Example Proofs...")
24
+ demo = ProofCoreDemo()
25
+
26
+ for i, example_name in enumerate(EXAMPLE_PROOFS.keys(), 1):
27
+ proof_info, steps = demo.load_example_proof(example_name)
28
+ assert "Loaded" in proof_info
29
+ assert len(demo.current_proof["steps"]) > 0
30
+ print(f" [{i}] {example_name}: {len(demo.current_proof['steps'])} steps")
31
+
32
+ print(f"[+] All {len(EXAMPLE_PROOFS)} examples loaded successfully\n")
33
+
34
+
35
+ def test_verification():
36
+ """Test 3: Verify example proofs"""
37
+ print("[>] Test 3: Verifying Example Proofs...")
38
+ demo = ProofCoreDemo()
39
+
40
+ for i, example_name in enumerate(EXAMPLE_PROOFS.keys(), 1):
41
+ demo.load_example_proof(example_name)
42
+ results, summary, metrics = demo.verify_current_proof()
43
+
44
+ # Check results contain verification data
45
+ assert "VERIFICATION RESULTS" in results
46
+ assert "[VALID]" in results or "[INVALID]" in results
47
+ assert "Symbolic Score" in results
48
+
49
+ print(f" [{i}] {example_name}: {len(demo.results)} steps verified")
50
+
51
+ print(f"[+] All proofs verified successfully\n")
52
+
53
+
54
+ def test_custom_steps():
55
+ """Test 4: Custom step verification"""
56
+ print("[>] Test 4: Custom Step Verification...")
57
+ demo = ProofCoreDemo()
58
+
59
+ test_steps = [
60
+ ("2+2=4", "2 + 2 = 4", "Simple addition", "algebra"),
61
+ ("x²=4 when x=2", "x^2 = 4", "Algebraic equation", "algebra"),
62
+ ("All angles sum to 180", "A + B + C = 180", "Triangle angle sum", "geometry"),
63
+ ("If P then Q", "P implies Q", "Logical implication", "logic"),
64
+ ]
65
+
66
+ for i, (claim, eq, reason, domain) in enumerate(test_steps, 1):
67
+ result = demo.create_custom_step(claim, eq, reason, domain)
68
+ assert "Custom Step Verification" in result
69
+ assert "Symbolic Score" in result
70
+ assert "Heuristic Score" in result
71
+ print(f" [{i}] {claim}: Verified")
72
+
73
+ print(f"[+] All custom steps verified successfully\n")
74
+
75
+
76
+ def test_performance():
77
+ """Test 5: Performance metrics"""
78
+ print("[>] Test 5: Performance Metrics...")
79
+ demo = ProofCoreDemo()
80
+
81
+ # Verify a proof
82
+ demo.load_example_proof("Algebra: Difference of Squares")
83
+ demo.verify_current_proof()
84
+
85
+ # Check metrics
86
+ metrics = demo.verifier.get_metrics()
87
+
88
+ assert metrics["network_calls"] == 0
89
+ assert metrics["offline_status"] == "100% Verified"
90
+ assert metrics["data_stored"] == "Local only"
91
+ assert metrics["avg_verification_time_ms"] >= 0
92
+
93
+ print(f" Network calls: {metrics['network_calls']}")
94
+ print(f" Offline status: {metrics['offline_status']}")
95
+ print(f" Avg verification time: {metrics['avg_verification_time_ms']}ms")
96
+ print(f" Proofs verified: {metrics['proofs_verified']}")
97
+ print("[+] Performance metrics verified\n")
98
+
99
+
100
+ def test_scoring():
101
+ """Test 6: Scoring functions"""
102
+ print("[>] Test 6: Scoring Functions...")
103
+ demo = ProofCoreDemo()
104
+
105
+ step = ProofStep(
106
+ id=1,
107
+ claim="If x = 2, then x² = 4",
108
+ equation="x^2 = 4",
109
+ reasoning="By substitution and algebraic verification",
110
+ domain="algebra"
111
+ )
112
+
113
+ # Test symbolic verification
114
+ symbolic = demo.verifier._symbolic_verify(step)
115
+ assert 0 <= symbolic <= 1.0
116
+ print(f" Symbolic score: {symbolic:.1%}")
117
+
118
+ # Test heuristic evaluation
119
+ heuristic = demo.verifier._heuristic_evaluate(step)
120
+ assert 0 <= heuristic <= 1.0
121
+ print(f" Heuristic score: {heuristic:.1%}")
122
+
123
+ # Test consensus
124
+ consensus = demo.verifier._consensus_score(symbolic, heuristic)
125
+ assert 0 <= consensus <= 1.0
126
+ print(f" Consensus score: {consensus:.1%}")
127
+
128
+ print("[+] Scoring functions validated\n")
129
+
130
+
131
+ def test_offline_guarantee():
132
+ """Test 7: Offline guarantee (zero network calls)"""
133
+ print("[>] Test 7: Offline Guarantee...")
134
+
135
+ # This demo runs 100% locally with no network calls
136
+ # Verify by checking metrics
137
+ demo = ProofCoreDemo()
138
+ demo.load_example_proof("Algebra: Quadratic Formula")
139
+ demo.verify_current_proof()
140
+
141
+ metrics = demo.verifier.get_metrics()
142
+
143
+ assert metrics["network_calls"] == 0, "Network calls should be 0!"
144
+ assert metrics["offline_status"] == "100% Verified", "Must be offline-safe!"
145
+
146
+ print(f" Network calls: {metrics['network_calls']} [VERIFIED]")
147
+ print(f" Offline operation: Confirmed [VERIFIED]")
148
+ print(f" Data storage: {metrics['data_stored']} [VERIFIED]")
149
+ print("[+] 100% offline operation confirmed\n")
150
+
151
+
152
+ def test_performance_targets():
153
+ """Test 8: Performance targets"""
154
+ print("[>] Test 8: Performance Targets...")
155
+ demo = ProofCoreDemo()
156
+
157
+ target_times = {
158
+ "Symbolic": 150, # ms
159
+ "Heuristic": 100, # ms
160
+ "Per-step average": 200, # ms
161
+ }
162
+
163
+ print(f" Target verification time (per step): <{target_times['Per-step average']}ms")
164
+
165
+ # Run verification
166
+ demo.load_example_proof("Algebra: Difference of Squares")
167
+ start = time.time()
168
+ demo.verify_current_proof()
169
+ elapsed_ms = (time.time() - start) * 1000
170
+
171
+ avg_per_step = elapsed_ms / len(demo.results)
172
+
173
+ print(f" Actual time: {avg_per_step:.1f}ms per step")
174
+ print(f" Status: {'[+] PASS' if avg_per_step < target_times['Per-step average'] else '[-] SLOW'}")
175
+ print("[+] Performance targets validated\n")
176
+
177
+
178
+ def run_all_tests():
179
+ """Run all tests"""
180
+ print("="*70)
181
+ print("[*] ProofCore v1.0.2 Demo - Test Suite")
182
+ print("="*70 + "\n")
183
+
184
+ tests = [
185
+ test_verifier_initialization,
186
+ test_load_examples,
187
+ test_verification,
188
+ test_custom_steps,
189
+ test_performance,
190
+ test_scoring,
191
+ test_offline_guarantee,
192
+ test_performance_targets,
193
+ ]
194
+
195
+ passed = 0
196
+ failed = 0
197
+
198
+ for test in tests:
199
+ try:
200
+ test()
201
+ passed += 1
202
+ except AssertionError as e:
203
+ print(f"[-] Test failed: {e}\n")
204
+ failed += 1
205
+ except Exception as e:
206
+ print(f"[-] Test error: {e}\n")
207
+ failed += 1
208
+
209
+ print("="*70)
210
+ print(f"[*] Test Results: {passed} passed, {failed} failed")
211
+ print("="*70)
212
+
213
+ if failed == 0:
214
+ print("[+] All tests passed! Demo is ready for deployment.")
215
+ return 0
216
+ else:
217
+ print(f"[-] {failed} test(s) failed. Please review.")
218
+ return 1
219
+
220
+
221
+ if __name__ == "__main__":
222
+ sys.exit(run_all_tests())