Varshith dharmaj commited on
Commit
cd17d1f
·
verified ·
1 Parent(s): 1d7be9f

Upload services/core_engine/consensus_module.py with huggingface_hub

Browse files
services/core_engine/consensus_module.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import List, Dict, Any
3
+ from verification_module import calculate_symbolic_score
4
+
5
+ def normalize_answers(answers: List[str]) -> Dict[str, List[int]]:
6
+ """
7
+ Normalized divergent mathematical text.
8
+ Fallback implementation for Windows to avoid WinError 6 from math_verify multiprocessing.
9
+ """
10
+ normalized_groups = {}
11
+
12
+ for idx, ans in enumerate(answers):
13
+ # Very simple normalization: strip spaces and convert to lowercase
14
+ # In a real scenario, this would use SymPy or more robust logic
15
+ clean_ans = ans.replace(" ", "").replace("\\", "").lower()
16
+
17
+ # Check against existing groups
18
+ matched = False
19
+ for rep_ans_key in list(normalized_groups.keys()):
20
+ rep_clean = rep_ans_key.replace(" ", "").replace("\\", "").lower()
21
+ if clean_ans == rep_clean:
22
+ normalized_groups[rep_ans_key].append(idx)
23
+ matched = True
24
+ break
25
+
26
+ if not matched:
27
+ normalized_groups[ans] = [idx]
28
+
29
+ return normalized_groups
30
+
31
+ def evaluate_consensus(agent_responses: List[Dict[str, Any]]) -> Dict[str, Any]:
32
+ """
33
+ Calculates the final Adaptive Consensus scoring algorithm from the MVM2 paper:
34
+ Score_j = 0.40 * V^{sym}_j + 0.35 * L^{logic}_j + 0.25 * C^{clf}_j
35
+ """
36
+ scores = []
37
+
38
+ # 1. Normalize final answers across agents
39
+ answers = [res["response"].get("Answer", "") for res in agent_responses]
40
+ answer_groups = normalize_answers(answers)
41
+
42
+ # 2. Evaluate individual agent execution paths
43
+ for idx, agent_data in enumerate(agent_responses):
44
+ res = agent_data["response"]
45
+ trace = res.get("Reasoning Trace", [])
46
+
47
+ # V^{sym}_j : SymPy / QWED Logical Validation (weight 0.40)
48
+ v_sym = calculate_symbolic_score(trace)
49
+
50
+ # L^{logic}_j : Trace density & semantic logical flow (weight 0.35)
51
+ # Placeholder mapping: more steps usually imply deeper logical breakdown
52
+ l_logic = min(len(trace) / 5.0, 1.0)
53
+
54
+ # C^{clf}_j : Classifier Confidence output (weight 0.25)
55
+ # Placeholder mapping: analyzing the confidence explanation string length or keyword mapping
56
+ conf_exp = res.get("Confidence Explanation", "")
57
+ c_clf = 1.0 if "guaranteed" in conf_exp.lower() or "proof" in conf_exp.lower() else 0.8
58
+
59
+ # Core Neuro-Symbolic Scoring Formula
60
+ score_j = (0.40 * v_sym) + (0.35 * l_logic) + (0.25 * c_clf)
61
+
62
+ scores.append({
63
+ "agent": agent_data["agent"],
64
+ "raw_answer": res.get("Answer"),
65
+ "V_sym": v_sym,
66
+ "L_logic": round(l_logic, 2),
67
+ "C_clf": round(c_clf, 2),
68
+ "Score_j": round(score_j, 3)
69
+ })
70
+
71
+ # 3. Aggregate Consensus by matching normalized answer groups
72
+ final_consensus = {}
73
+ top_score = -1.0
74
+ best_answer = "Error: Unresolvable Divergence"
75
+
76
+ for rep_ans, indices in answer_groups.items():
77
+ group_score = sum(scores[i]["Score_j"] for i in indices)
78
+
79
+ # MVM2 applies a divergence consistency multiplier
80
+ # If multiple agents independently arrive at normalized truth, boost score
81
+ consistency_multiplier = 1.0 + (0.1 * (len(indices) - 1))
82
+ weighted_group_score = group_score * consistency_multiplier
83
+
84
+ if weighted_group_score > top_score:
85
+ top_score = weighted_group_score
86
+ best_answer = rep_ans
87
+
88
+ final_consensus[rep_ans] = {
89
+ "agent_indices": indices,
90
+ "agents_supporting": [scores[i]["agent"] for i in indices],
91
+ "aggregate_score": round(weighted_group_score, 3)
92
+ }
93
+
94
+ return {
95
+ "final_verified_answer": best_answer,
96
+ "winning_score": top_score,
97
+ "detail_scores": scores,
98
+ "divergence_groups": final_consensus
99
+ }