Varshith dharmaj
Robust MVM2 System Sync: Fixed Imports and Restored Services
b25b8f2 verified
import math
from typing import List, Dict, Any
from services.core_engine.verification_module import calculate_symbolic_score
def normalize_answers(answers: List[str]) -> Dict[str, List[int]]:
"""
Normalized divergent mathematical text.
Fallback implementation for Windows to avoid WinError 6 from math_verify multiprocessing.
"""
normalized_groups = {}
for idx, ans in enumerate(answers):
# Very simple normalization: strip spaces and convert to lowercase
# In a real scenario, this would use SymPy or more robust logic
clean_ans = ans.replace(" ", "").replace("\\", "").lower()
# Check against existing groups
matched = False
for rep_ans_key in list(normalized_groups.keys()):
rep_clean = rep_ans_key.replace(" ", "").replace("\\", "").lower()
if clean_ans == rep_clean:
normalized_groups[rep_ans_key].append(idx)
matched = True
break
if not matched:
normalized_groups[ans] = [idx]
return normalized_groups
def evaluate_consensus(agent_responses: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Calculates the final Adaptive Consensus scoring algorithm from the MVM2 paper:
Score_j = 0.40 * V^{sym}_j + 0.35 * L^{logic}_j + 0.25 * C^{clf}_j
"""
scores = []
# 1. Normalize final answers across agents
answers = [res["response"].get("Answer", "") for res in agent_responses]
answer_groups = normalize_answers(answers)
# 2. Evaluate individual agent execution paths
for idx, agent_data in enumerate(agent_responses):
res = agent_data["response"]
trace = res.get("Reasoning Trace", [])
# V^{sym}_j : SymPy / QWED Logical Validation (weight 0.40)
v_sym = calculate_symbolic_score(trace)
# L^{logic}_j : Trace density & semantic logical flow (weight 0.35)
# Placeholder mapping: more steps usually imply deeper logical breakdown
l_logic = min(len(trace) / 5.0, 1.0)
# C^{clf}_j : Classifier Confidence output (weight 0.25)
# Placeholder mapping: analyzing the confidence explanation string length or keyword mapping
conf_exp = res.get("Confidence Explanation", "")
c_clf = 1.0 if "guaranteed" in conf_exp.lower() or "proof" in conf_exp.lower() else 0.8
# Core Neuro-Symbolic Scoring Formula
score_j = (0.40 * v_sym) + (0.35 * l_logic) + (0.25 * c_clf)
scores.append({
"agent": agent_data["agent"],
"raw_answer": res.get("Answer"),
"V_sym": v_sym,
"L_logic": round(l_logic, 2),
"C_clf": round(c_clf, 2),
"Score_j": round(score_j, 3)
})
# 3. Aggregate Consensus by matching normalized answer groups
final_consensus = {}
top_score = -1.0
best_answer = "Error: Unresolvable Divergence"
for rep_ans, indices in answer_groups.items():
group_score = sum(scores[i]["Score_j"] for i in indices)
# MVM2 applies a divergence consistency multiplier
# If multiple agents independently arrive at normalized truth, boost score
consistency_multiplier = 1.0 + (0.1 * (len(indices) - 1))
weighted_group_score = group_score * consistency_multiplier
if weighted_group_score > top_score:
top_score = weighted_group_score
best_answer = rep_ans
final_consensus[rep_ans] = {
"agent_indices": indices,
"agents_supporting": [scores[i]["agent"] for i in indices],
"aggregate_score": round(weighted_group_score, 3)
}
return {
"final_verified_answer": best_answer,
"winning_score": top_score,
"detail_scores": scores,
"divergence_groups": final_consensus
}