mvm2-math-verification / system_metrics.json
Varshith dharmaj
Upload system_metrics.json with huggingface_hub
fe6cdd3 verified
{
"system_info": {
"version": "1.0.0",
"codename": "MVM2-NeuroSymbolic",
"last_updated": "2026-03-12"
},
"performance_metrics": [
{"metric": "Overall Accuracy", "mvm2_score": 92.7, "target": 90.0, "baseline_gpt4": 72.0},
{"metric": "OCR-Robust Accuracy", "mvm2_score": 84.6, "target": 80.0, "baseline_gpt4": 41.2},
{"metric": "Reasoning Step Validity", "mvm2_score": 89.4, "target": 85.0, "baseline_gpt4": 65.4},
{"metric": "Hallucination Rate", "mvm2_score": 4.2, "target": 5.0, "baseline_gpt4": 18.7},
{"metric": "System Confidence", "mvm2_score": 88.0, "target": 85.0, "baseline_gpt4": 71.0}
],
"latency_breakdown": [
{"layer": "OCR Extraction", "latency_sec": 1.4, "api_baseline": 3.5},
{"layer": "Symbolic Verifier", "latency_sec": 0.5, "api_baseline": 1.2},
{"layer": "Multi-Agent Logic", "latency_sec": 2.8, "api_baseline": 6.4},
{"layer": "Consensus Fusion", "latency_sec": 0.2, "api_baseline": 0.5}
],
"error_profile": {
"labels": ["Correct", "Calculation Slip", "Logic Gap", "OCR Blur"],
"values": [92.7, 3.1, 2.2, 2.0]
}
}