Varshith dharmaj commited on
Commit
bdc964e
·
verified ·
1 Parent(s): d3d6b2b

Upload consensus/consensus_mechanism.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. consensus/consensus_mechanism.py +127 -0
consensus/consensus_mechanism.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sympy
2
+ from sympy.parsing.sympy_parser import parse_expr, standard_transformations, implicit_multiplication_application, convert_equals_signs
3
+ from typing import Dict, Any, List
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def evaluate_sympy_algebra(steps: List[str]) -> float:
9
+ """
10
+ Symbolically check step-by-step transformations using SymPy.
11
+ Returns a score 0.0 to 1.0 based on how many mathematical equalities hold.
12
+ """
13
+ if not steps:
14
+ return 0.0
15
+
16
+ transformations = standard_transformations + (implicit_multiplication_application, convert_equals_signs)
17
+ valid_count = 0
18
+ math_statements = 0
19
+
20
+ for step in steps:
21
+ if "=" in step:
22
+ math_statements += 1
23
+ # Very basic extraction: clean up common string issues before SymPy
24
+ clean_step = step.replace("Let x", "").replace("let", "").strip()
25
+ # Try to safely parse side left and side right
26
+ try:
27
+ # sympy.simplify() evaluating left - right == 0 proves equality
28
+ left, right = clean_step.split('=', 1)
29
+ left_expr = parse_expr(left, transformations=transformations)
30
+ right_expr = parse_expr(right, transformations=transformations)
31
+
32
+ if sympy.simplify(left_expr - right_expr) == 0:
33
+ valid_count += 1
34
+ except Exception:
35
+ pass
36
+
37
+ if math_statements == 0:
38
+ return 0.5 # Neutral if no explicit math equations found
39
+
40
+ return valid_count / math_statements
41
+
42
+ def compute_neurosymbolic_consensus(agent_results: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
43
+ """
44
+ Implements the MVM² Hybrid Verification System: SymPy + Divergence Matrix + Weighted Scoring.
45
+ 40% Symbolic check validity
46
+ 35% Logical Consistency (Simulated via Agent Confidence Trace)
47
+ 25% Classifier Signal (Divergence Matrix step alignment)
48
+ """
49
+
50
+ # 1. Divergence Matrix (Step-Level Alignment)
51
+ divergence_scores = {}
52
+ hallucination_alerts = []
53
+
54
+ # Simple proxy for divergence: Compare the variance in number of reasoning steps
55
+ agent_names = list(agent_results.keys())
56
+ trace_lengths = {name: len(agent_results[name].get("reasoning_trace", [])) for name in agent_names}
57
+ avg_length = sum(trace_lengths.values()) / max(1, len(trace_lengths))
58
+
59
+ for name, length in trace_lengths.items():
60
+ # High deviation from average length implies divergent reasoning path
61
+ if avg_length == 0:
62
+ divergence = 0.5
63
+ else:
64
+ deviation = min(1.0, abs(length - avg_length) / avg_length)
65
+ divergence = 1.0 - deviation # 1.0 = perfect alignment
66
+
67
+ divergence_scores[name] = divergence
68
+
69
+ # Hallucination Alert Threshold (<0.7)
70
+ if divergence < 0.7:
71
+ hallucination_alerts.append(f"Alert: {name} fell below 0.7 step agreement (Score: {divergence:.2f}). Possible hallucination detected.")
72
+
73
+ # 2. Extract Answers and Score Individual Agents
74
+ final_agent_scores = {}
75
+
76
+ for name, data in agent_results.items():
77
+ steps = data.get("reasoning_trace", [])
78
+
79
+ # A. Symbolic Check (40%)
80
+ symbolic_score = evaluate_sympy_algebra(steps)
81
+
82
+ # B. Logical Consistency (35%)
83
+ # Map agent's internal confidence evaluation
84
+ conf_text = str(data.get("confidence_explanation", "")).lower()
85
+ if "hallucination" in conf_text or "error" in conf_text or "guess" in conf_text:
86
+ logical_score = 0.3
87
+ else:
88
+ logical_score = 0.95
89
+
90
+ # C. Classifier Signal (25%)
91
+ # Use divergence matrix step-alignment score
92
+ clf_score = divergence_scores.get(name, 0.5)
93
+
94
+ # Calculate Domain-Informed Weighted Scoring
95
+ weighted_score = (0.40 * symbolic_score) + (0.35 * logical_score) + (0.25 * clf_score)
96
+
97
+ final_agent_scores[name] = {
98
+ "symbolic": round(symbolic_score, 3),
99
+ "logical": round(logical_score, 3),
100
+ "classifier": round(clf_score, 3),
101
+ "weighted_score": round(weighted_score, 3),
102
+ "final_answer": data.get("final_answer", "ERROR")
103
+ }
104
+
105
+ # 3. Overall System Decision
106
+ best_agent = max(final_agent_scores.items(), key=lambda x: x[1]["weighted_score"])
107
+ best_name = best_agent[0]
108
+ best_score = best_agent[1]["weighted_score"]
109
+
110
+ if best_score > 0.65:
111
+ final_verdict = "VALID"
112
+ overall_confidence = min(0.99, best_score * 1.1)
113
+ else:
114
+ final_verdict = "ERROR"
115
+ overall_confidence = max(0.1, best_score)
116
+
117
+ return {
118
+ "final_verdict": final_verdict,
119
+ "overall_confidence": round(overall_confidence, 3),
120
+ "chosen_answer": best_agent[1]["final_answer"],
121
+ "chosen_agent": best_name,
122
+ "hallucination_alerts": hallucination_alerts,
123
+ "divergence_scores": {k: round(v, 3) for k, v in divergence_scores.items()},
124
+ "agent_scoring_breakdown": final_agent_scores,
125
+ "all_errors": [], # Kept for API compatibility with legacy Dashboard
126
+ "individual_verdicts": {k: "VALID" if v["weighted_score"] > 0.65 else "ERROR" for k,v in final_agent_scores.items()}
127
+ }