Varshith dharmaj commited on
Upload consensus/consensus_mechanism.py with huggingface_hub
Browse files- consensus/consensus_mechanism.py +127 -0
consensus/consensus_mechanism.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sympy
|
| 2 |
+
from sympy.parsing.sympy_parser import parse_expr, standard_transformations, implicit_multiplication_application, convert_equals_signs
|
| 3 |
+
from typing import Dict, Any, List
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
def evaluate_sympy_algebra(steps: List[str]) -> float:
|
| 9 |
+
"""
|
| 10 |
+
Symbolically check step-by-step transformations using SymPy.
|
| 11 |
+
Returns a score 0.0 to 1.0 based on how many mathematical equalities hold.
|
| 12 |
+
"""
|
| 13 |
+
if not steps:
|
| 14 |
+
return 0.0
|
| 15 |
+
|
| 16 |
+
transformations = standard_transformations + (implicit_multiplication_application, convert_equals_signs)
|
| 17 |
+
valid_count = 0
|
| 18 |
+
math_statements = 0
|
| 19 |
+
|
| 20 |
+
for step in steps:
|
| 21 |
+
if "=" in step:
|
| 22 |
+
math_statements += 1
|
| 23 |
+
# Very basic extraction: clean up common string issues before SymPy
|
| 24 |
+
clean_step = step.replace("Let x", "").replace("let", "").strip()
|
| 25 |
+
# Try to safely parse side left and side right
|
| 26 |
+
try:
|
| 27 |
+
# sympy.simplify() evaluating left - right == 0 proves equality
|
| 28 |
+
left, right = clean_step.split('=', 1)
|
| 29 |
+
left_expr = parse_expr(left, transformations=transformations)
|
| 30 |
+
right_expr = parse_expr(right, transformations=transformations)
|
| 31 |
+
|
| 32 |
+
if sympy.simplify(left_expr - right_expr) == 0:
|
| 33 |
+
valid_count += 1
|
| 34 |
+
except Exception:
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
if math_statements == 0:
|
| 38 |
+
return 0.5 # Neutral if no explicit math equations found
|
| 39 |
+
|
| 40 |
+
return valid_count / math_statements
|
| 41 |
+
|
| 42 |
+
def compute_neurosymbolic_consensus(agent_results: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
|
| 43 |
+
"""
|
| 44 |
+
Implements the MVM² Hybrid Verification System: SymPy + Divergence Matrix + Weighted Scoring.
|
| 45 |
+
40% Symbolic check validity
|
| 46 |
+
35% Logical Consistency (Simulated via Agent Confidence Trace)
|
| 47 |
+
25% Classifier Signal (Divergence Matrix step alignment)
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# 1. Divergence Matrix (Step-Level Alignment)
|
| 51 |
+
divergence_scores = {}
|
| 52 |
+
hallucination_alerts = []
|
| 53 |
+
|
| 54 |
+
# Simple proxy for divergence: Compare the variance in number of reasoning steps
|
| 55 |
+
agent_names = list(agent_results.keys())
|
| 56 |
+
trace_lengths = {name: len(agent_results[name].get("reasoning_trace", [])) for name in agent_names}
|
| 57 |
+
avg_length = sum(trace_lengths.values()) / max(1, len(trace_lengths))
|
| 58 |
+
|
| 59 |
+
for name, length in trace_lengths.items():
|
| 60 |
+
# High deviation from average length implies divergent reasoning path
|
| 61 |
+
if avg_length == 0:
|
| 62 |
+
divergence = 0.5
|
| 63 |
+
else:
|
| 64 |
+
deviation = min(1.0, abs(length - avg_length) / avg_length)
|
| 65 |
+
divergence = 1.0 - deviation # 1.0 = perfect alignment
|
| 66 |
+
|
| 67 |
+
divergence_scores[name] = divergence
|
| 68 |
+
|
| 69 |
+
# Hallucination Alert Threshold (<0.7)
|
| 70 |
+
if divergence < 0.7:
|
| 71 |
+
hallucination_alerts.append(f"Alert: {name} fell below 0.7 step agreement (Score: {divergence:.2f}). Possible hallucination detected.")
|
| 72 |
+
|
| 73 |
+
# 2. Extract Answers and Score Individual Agents
|
| 74 |
+
final_agent_scores = {}
|
| 75 |
+
|
| 76 |
+
for name, data in agent_results.items():
|
| 77 |
+
steps = data.get("reasoning_trace", [])
|
| 78 |
+
|
| 79 |
+
# A. Symbolic Check (40%)
|
| 80 |
+
symbolic_score = evaluate_sympy_algebra(steps)
|
| 81 |
+
|
| 82 |
+
# B. Logical Consistency (35%)
|
| 83 |
+
# Map agent's internal confidence evaluation
|
| 84 |
+
conf_text = str(data.get("confidence_explanation", "")).lower()
|
| 85 |
+
if "hallucination" in conf_text or "error" in conf_text or "guess" in conf_text:
|
| 86 |
+
logical_score = 0.3
|
| 87 |
+
else:
|
| 88 |
+
logical_score = 0.95
|
| 89 |
+
|
| 90 |
+
# C. Classifier Signal (25%)
|
| 91 |
+
# Use divergence matrix step-alignment score
|
| 92 |
+
clf_score = divergence_scores.get(name, 0.5)
|
| 93 |
+
|
| 94 |
+
# Calculate Domain-Informed Weighted Scoring
|
| 95 |
+
weighted_score = (0.40 * symbolic_score) + (0.35 * logical_score) + (0.25 * clf_score)
|
| 96 |
+
|
| 97 |
+
final_agent_scores[name] = {
|
| 98 |
+
"symbolic": round(symbolic_score, 3),
|
| 99 |
+
"logical": round(logical_score, 3),
|
| 100 |
+
"classifier": round(clf_score, 3),
|
| 101 |
+
"weighted_score": round(weighted_score, 3),
|
| 102 |
+
"final_answer": data.get("final_answer", "ERROR")
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
# 3. Overall System Decision
|
| 106 |
+
best_agent = max(final_agent_scores.items(), key=lambda x: x[1]["weighted_score"])
|
| 107 |
+
best_name = best_agent[0]
|
| 108 |
+
best_score = best_agent[1]["weighted_score"]
|
| 109 |
+
|
| 110 |
+
if best_score > 0.65:
|
| 111 |
+
final_verdict = "VALID"
|
| 112 |
+
overall_confidence = min(0.99, best_score * 1.1)
|
| 113 |
+
else:
|
| 114 |
+
final_verdict = "ERROR"
|
| 115 |
+
overall_confidence = max(0.1, best_score)
|
| 116 |
+
|
| 117 |
+
return {
|
| 118 |
+
"final_verdict": final_verdict,
|
| 119 |
+
"overall_confidence": round(overall_confidence, 3),
|
| 120 |
+
"chosen_answer": best_agent[1]["final_answer"],
|
| 121 |
+
"chosen_agent": best_name,
|
| 122 |
+
"hallucination_alerts": hallucination_alerts,
|
| 123 |
+
"divergence_scores": {k: round(v, 3) for k, v in divergence_scores.items()},
|
| 124 |
+
"agent_scoring_breakdown": final_agent_scores,
|
| 125 |
+
"all_errors": [], # Kept for API compatibility with legacy Dashboard
|
| 126 |
+
"individual_verdicts": {k: "VALID" if v["weighted_score"] > 0.65 else "ERROR" for k,v in final_agent_scores.items()}
|
| 127 |
+
}
|