Spaces:

Varshithdharmajv
/

mvm2-math-verification

Running

mvm2-math-verification / services /core_engine /consensus_module.py

Varshith dharmaj

Robust MVM2 System Sync: Fixed Imports and Restored Services

b25b8f2 verified 17 days ago

4.06 kB

	import math
	from typing import List, Dict, Any
	from services.core_engine.verification_module import calculate_symbolic_score

	def normalize_answers(answers: List[str]) -> Dict[str, List[int]]:
	"""
	Normalized divergent mathematical text.
	Fallback implementation for Windows to avoid WinError 6 from math_verify multiprocessing.
	"""
	normalized_groups = {}

	for idx, ans in enumerate(answers):
	# Very simple normalization: strip spaces and convert to lowercase
	# In a real scenario, this would use SymPy or more robust logic
	clean_ans = ans.replace(" ", "").replace("\\", "").lower()

	# Check against existing groups
	matched = False
	for rep_ans_key in list(normalized_groups.keys()):
	rep_clean = rep_ans_key.replace(" ", "").replace("\\", "").lower()
	if clean_ans == rep_clean:
	normalized_groups[rep_ans_key].append(idx)
	matched = True
	break

	if not matched:
	normalized_groups[ans] = [idx]

	return normalized_groups

	def evaluate_consensus(agent_responses: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""
	Calculates the final Adaptive Consensus scoring algorithm from the MVM2 paper:
	Score_j = 0.40 * V^{sym}_j + 0.35 * L^{logic}_j + 0.25 * C^{clf}_j
	"""
	scores = []

	# 1. Normalize final answers across agents
	answers = [res["response"].get("Answer", "") for res in agent_responses]
	answer_groups = normalize_answers(answers)

	# 2. Evaluate individual agent execution paths
	for idx, agent_data in enumerate(agent_responses):
	res = agent_data["response"]
	trace = res.get("Reasoning Trace", [])

	# V^{sym}_j : SymPy / QWED Logical Validation (weight 0.40)
	v_sym = calculate_symbolic_score(trace)

	# L^{logic}_j : Trace density & semantic logical flow (weight 0.35)
	# Placeholder mapping: more steps usually imply deeper logical breakdown
	l_logic = min(len(trace) / 5.0, 1.0)

	# C^{clf}_j : Classifier Confidence output (weight 0.25)
	# Placeholder mapping: analyzing the confidence explanation string length or keyword mapping
	conf_exp = res.get("Confidence Explanation", "")
	c_clf = 1.0 if "guaranteed" in conf_exp.lower() or "proof" in conf_exp.lower() else 0.8

	# Core Neuro-Symbolic Scoring Formula
	score_j = (0.40 * v_sym) + (0.35 * l_logic) + (0.25 * c_clf)

	scores.append({
	"agent": agent_data["agent"],
	"raw_answer": res.get("Answer"),
	"V_sym": v_sym,
	"L_logic": round(l_logic, 2),
	"C_clf": round(c_clf, 2),
	"Score_j": round(score_j, 3)
	})

	# 3. Aggregate Consensus by matching normalized answer groups
	final_consensus = {}
	top_score = -1.0
	best_answer = "Error: Unresolvable Divergence"

	for rep_ans, indices in answer_groups.items():
	group_score = sum(scores[i]["Score_j"] for i in indices)

	# MVM2 applies a divergence consistency multiplier
	# If multiple agents independently arrive at normalized truth, boost score
	consistency_multiplier = 1.0 + (0.1 * (len(indices) - 1))
	weighted_group_score = group_score * consistency_multiplier

	if weighted_group_score > top_score:
	top_score = weighted_group_score
	best_answer = rep_ans

	final_consensus[rep_ans] = {
	"agent_indices": indices,
	"agents_supporting": [scores[i]["agent"] for i in indices],
	"aggregate_score": round(weighted_group_score, 3)
	}

	return {
	"final_verified_answer": best_answer,
	"winning_score": top_score,
	"detail_scores": scores,
	"divergence_groups": final_consensus
	}