vriddhi saini
Initial MLOps multi-agent system
6e9d8ea
# ══════════════════════════════════════════════════════════════════════════
# MULTI AGENT SYSTEM β€” Research Verification Platform
# Built on: Mistral API (mistral-small-latest) + FAISS RAG + xAI Layer
#
# Changes from local version:
# - Local Mistral-7B replaced with Mistral API (~2-5s per agent vs ~8 mins)
# - No GPU/VRAM required β€” runs on CPU only
# - No bitsandbytes/accelerate/torch needed for inference
# - torch only used for embedding model (CPU)
# - Each agent call = one API request to mistral-small-latest
# ══════════════════════════════════════════════════════════════════════════
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
import os
from dotenv import load_dotenv
load_dotenv() # loads MISTRAL_API_KEY from .env
import os, re, json, datetime
import numpy as np
import pandas as pd
import faiss
import gradio as gr
import plotly.graph_objects as go
#from mistralai import Mistral
try:
from mistralai import Mistral
except ImportError:
from mistralai.client import Mistral
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import mlflow
from monitoring import monitor_request
from scripts.data_versioning import get_rag_stores
# ══════════════════════════════════════════════════════════════════════════
# SECTION 1 β€” MISTRAL API CLIENT + EMBEDDING MODEL
# ══════════════════════════════════════════════════════════════════════════
os.environ["TOKENIZERS_PARALLELISM"] = "false"
print("Loading embedding model...")
embed_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY", ""))
MISTRAL_MODEL = "mistral-small-latest"
print("Ready.")
def encode_single(text: str) -> np.ndarray:
return embed_model.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
def encode_query(text: str) -> np.ndarray:
return embed_model.encode([text], convert_to_numpy=True, normalize_embeddings=True)
# ══════════════════════════════════════════════════════════════════════════
# SECTION 2 β€” RAG STORES
# ══════════════════════════════════════════════════════════════════════════
CMV_PER_AGENT = 500
WIKI_TOTAL = 1000
WIKI_CHUNK_SZ = 150
def build_faiss_index(embeddings: np.ndarray) -> faiss.IndexFlatIP:
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)
return index
def build_rag_stores() -> dict:
stores = {}
print("\nLoading CMV dataset...")
cmv = load_dataset("Siddish/change-my-view-subreddit-cleaned")
df_cmv = pd.DataFrame(cmv["train"])
text_col = "text" if "text" in df_cmv.columns else df_cmv.columns[0]
df_cmv = df_cmv[[text_col]].rename(columns={text_col: "text"})
df_cmv = df_cmv[df_cmv["text"].str.len() > 50].reset_index(drop=True)
df_cmv["text"] = df_cmv["text"].str[:300]
if "delta" in df_cmv.columns:
df_prop = df_cmv[df_cmv["delta"] == True].head(CMV_PER_AGENT).reset_index(drop=True)
df_opp = df_cmv[df_cmv["delta"] == False].head(CMV_PER_AGENT).reset_index(drop=True)
else:
mid = len(df_cmv) // 2
df_prop = df_cmv.iloc[:mid].head(CMV_PER_AGENT).reset_index(drop=True)
df_opp = df_cmv.iloc[mid:].head(CMV_PER_AGENT).reset_index(drop=True)
print(f" Proposer: {len(df_prop)} | Opposer: {len(df_opp)}")
all_cmv = df_prop["text"].tolist() + df_opp["text"].tolist()
all_embs = embed_model.encode(all_cmv, convert_to_numpy=True,
batch_size=1, show_progress_bar=False,
normalize_embeddings=True)
stores["Proposer"] = {"texts": df_prop["text"].tolist(), "source": "CMV (pro-arguments)",
"index": build_faiss_index(all_embs[:len(df_prop)])}
stores["Opposer"] = {"texts": df_opp["text"].tolist(), "source": "CMV (counter-arguments)",
"index": build_faiss_index(all_embs[len(df_prop):])}
print("\nStreaming Wikipedia (~1000 chunks)...")
wiki = load_dataset("wikimedia/wikipedia", "20231101.en", split="train", streaming=True)
chunks = []
for row in wiki:
words = row["text"].split()
for i in range(0, len(words), WIKI_CHUNK_SZ):
chunk = " ".join(words[i:i+WIKI_CHUNK_SZ])
if 60 <= len(chunk.split()) <= WIKI_CHUNK_SZ:
chunks.append(chunk)
if len(chunks) >= WIKI_TOTAL:
break
chunks = chunks[:WIKI_TOTAL]
mid_wiki = len(chunks) // 2
wiki_embs = embed_model.encode(chunks, convert_to_numpy=True,
batch_size=1, show_progress_bar=False,
normalize_embeddings=True)
stores["Critic"] = {"texts": chunks[:mid_wiki], "source": "Wikipedia (factual)",
"index": build_faiss_index(wiki_embs[:mid_wiki])}
stores["Synthesizer"] = {"texts": chunks[mid_wiki:], "source": "Wikipedia (balanced)",
"index": build_faiss_index(wiki_embs[mid_wiki:])}
print("\nKnowledge stores ready.")
return stores
print("\nBuilding knowledge stores (~1-2 mins)...")
#RAG_STORES = build_rag_stores()
RAG_STORES = get_rag_stores(build_rag_stores)
_last_sources = {}
def retrieve_for_agent(agent_name: str, query_emb: np.ndarray, k: int = 3) -> list:
store = RAG_STORES[agent_name]
D, I = store["index"].search(query_emb, k)
docs = [store["texts"][i] for i in I[0] if i < len(store["texts"])]
_last_sources[agent_name] = {
"source_label": store["source"],
"snippets": [d[:120] for d in docs]
}
return docs
# ══════════════════════════════════════════════════════════════════════════
# SECTION 3 β€” AGENT DEFINITIONS
# ══════════════════════════════════════════════════════════════════════════
_FMT_RULES = """\
STRICT OUTPUT RULES:
1. Output ONLY the labeled fields below, nothing else.
2. Each field starts at the beginning of a new line.
3. CLAIM must be exactly ONE sentence.
4. Each ARGUMENT must be ONE sentence on its own line.
5. Do NOT add prose, explanations, or extra fields."""
_PROPOSER_EX = """\
CLAIM: Renewable energy is the most viable path to carbon neutrality.
ARGUMENT 1: Solar costs have fallen 90% in the last decade making it cost-competitive.
ARGUMENT 2: Wind and solar combined can meet baseload demand with sufficient storage.
ARGUMENT 3: Renewable investment creates more jobs per dollar than fossil fuel investment.
CONFIDENCE: 8
INFLUENCED_BY: none
POSITION_SHIFT: none"""
_OPPOSER_EX = """\
CLAIM: Renewable energy alone cannot reliably replace fossil fuels today.
ARGUMENT 1: Grid-scale storage remains prohibitively expensive for 100% renewable grids.
ARGUMENT 2: Manufacturing solar panels requires significant upfront carbon emissions.
ARGUMENT 3: Energy demand in developing nations is growing faster than renewable capacity.
CONFIDENCE: 7
INFLUENCED_BY: Proposer Argument 1
POSITION_SHIFT: none"""
_CRITIC_EX = """\
CLAIM: Both positions contain valid points but overstate certainty in key areas.
ARGUMENT 1: Proposer's cost claims are accurate but ignore storage and grid upgrade costs.
ARGUMENT 2: Opposer's manufacturing emissions argument ignores lifecycle carbon accounting.
ARGUMENT 3: Neither position addresses policy and governance barriers adequately.
CONFIDENCE: 8
INFLUENCED_BY: Opposer
POSITION_SHIFT: slight"""
_SYNTH_EX = """\
CLAIM: A balanced renewable transition is achievable but requires addressing storage and policy gaps.
ARGUMENT 1: Cost trends strongly support renewables as the long-term primary energy source.
ARGUMENT 2: Transition requires parallel investment in storage, grid infrastructure, and policy.
ARGUMENT 3: The Critic's lifecycle analysis provides the most accurate framing of trade-offs.
CONFIDENCE: 8
TRUST_SCORE: 74
INFLUENCED_BY: Critic
POSITION_SHIFT: moderate"""
AGENT_CONFIG = {
"Proposer": {
"temperature": 0.8, "color": "#22c55e",
"display": "Proposer", "role_desc": "Defends the claim with evidence",
"system": (
"You are the Proposer in a research verification system. "
"Present the strongest evidence-based case IN SUPPORT of the claim.\n\n"
f"{_FMT_RULES}\n\nExample:\n{_PROPOSER_EX}"
),
},
"Opposer": {
"temperature": 0.8, "color": "#ef4444",
"display": "Opposer", "role_desc": "Challenges with counter-evidence",
"system": (
"You are the Opposer in a research verification system. "
"Present the strongest evidence-based case AGAINST the claim.\n\n"
f"{_FMT_RULES}\n\nExample:\n{_OPPOSER_EX}"
),
},
"Critic": {
"temperature": 0.3, "color": "#3b82f6",
"display": "Critic", "role_desc": "Audits logical consistency of both sides",
"system": (
"You are the Critic in a research verification system. "
"Objectively evaluate logical consistency and evidential quality of BOTH "
"the Proposer and Opposer. Identify overstatements and logical fallacies.\n\n"
f"{_FMT_RULES}\n\nExample:\n{_CRITIC_EX}"
),
},
"Synthesizer": {
"temperature": 0.4, "color": "#f59e0b",
"display": "Synthesizer", "role_desc": "Produces a calibrated final verdict",
"system": (
"You are the Synthesizer in a research verification system. "
"Produce a calibrated, balanced verdict. "
"TRUST_SCORE (0-100) = how well-supported the original claim is. "
"0=completely unsupported, 50=contested, 100=strongly supported.\n\n"
f"{_FMT_RULES}\n\nExample:\n{_SYNTH_EX}"
),
},
}
CONTEXT_MAP = {
"Proposer": [],
"Opposer": ["Proposer"],
"Critic": ["Proposer", "Opposer"],
"Synthesizer": ["Proposer", "Opposer", "Critic"],
}
AGENT_ORDER = ["Proposer", "Opposer", "Critic", "Synthesizer"]
SCORE_DIMS = ["Logic", "Evidence", "Clarity", "Rigor", "Nuance"]
COLOR_MAP = {
"Proposer": ("34,197,94", "#22c55e"),
"Opposer": ("239,68,68", "#ef4444"),
"Critic": ("59,130,246", "#3b82f6"),
"Synthesizer": ("245,158,11", "#f59e0b"),
}
# ══════════════════════════════════════════════════════════════════════════
# SECTION 4 β€” MISTRAL API CALL
# ══════════════════════════════════════════════════════════════════════════
def generate_response(agent_name: str, system_prompt: str, user_prompt: str) -> str:
temperature = AGENT_CONFIG[agent_name]["temperature"]
try:
response = mistral_client.chat.complete(
model=MISTRAL_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=temperature,
max_tokens=400,
)
return response.choices[0].message.content.strip()
except Exception as e:
if "api_key" in str(e).lower() or "authentication" in str(e).lower() or "401" in str(e):
raise RuntimeError(
"Mistral API key missing or invalid. "
"Set os.environ['MISTRAL_API_KEY'] in Cell 1 and restart."
)
raise RuntimeError(f"API error for {agent_name}: {str(e)}")
# ══════════════════════════════════════════════════════════════════════════
# SECTION 5 β€” PROMPT BUILDER
# ══════════════════════════════════════════════════════════════════════════
def build_prompt(agent_name: str, topic: str, history: dict, mode: str,
query_emb: np.ndarray, external_output: str = None) -> tuple:
system = AGENT_CONFIG[agent_name]["system"]
if mode == "verify" and external_output:
seed = f"Research claim to verify:\n\"{external_output[:500]}\"\n\nOriginal topic: {topic}"
else:
seed = f"Research topic: {topic}"
prior = ""
for pa in CONTEXT_MAP[agent_name]:
snippet = history.get(pa, "")[:500]
prior += f"\n\n--- {pa} ---\n{snippet}"
rag_docs = retrieve_for_agent(agent_name, query_emb, k=3)
rag_text = "\n".join([f"β€’ {doc[:120]}" for doc in rag_docs])
user = (
f"{seed}{prior}\n\n"
f"Evidence from knowledge base:\n{rag_text}\n\n"
f"Respond in the format specified. Be concise."
)
return system, user
# ══════════════════════════════════════════════════════════════════════════
# SECTION 6 β€” RESPONSE PARSER
# ══════════════════════════════════════════════════════════════════════════
def parse_response(text: str) -> dict:
result = {"claim": "", "arguments": [], "confidence": 5,
"influenced_by": "unknown", "position_shift": "unknown", "trust_score": None}
def _get(pattern, default=""):
m = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
return m.group(1).strip() if m else default
result["claim"] = _get(r"CLAIM:\s*(.+?)(?=ARGUMENT\s*1|CONFIDENCE|$)")
for i in range(1, 4):
arg = _get(rf"ARGUMENT\s*{i}:\s*(.+?)(?=ARGUMENT\s*{i+1}|CONFIDENCE|INFLUENCED|TRUST|$)")
if arg: result["arguments"].append(arg)
conf = re.search(r"CONFIDENCE:\s*(\d+)", text, re.IGNORECASE)
if conf: result["confidence"] = min(10, max(1, int(conf.group(1))))
trust = re.search(r"TRUST_SCORE:\s*(\d+)", text, re.IGNORECASE)
if trust: result["trust_score"] = min(100, max(0, int(trust.group(1))))
result["influenced_by"] = _get(r"INFLUENCED_BY:\s*(.+?)(?=\n|POSITION|TRUST|$)", "unknown")
result["position_shift"] = _get(r"POSITION_SHIFT:\s*(.+?)(?=\n|$)", "unknown")
if not result["claim"] and not result["arguments"]:
lines = [l.strip() for l in text.split("\n") if l.strip()]
result["claim"] = lines[0] if lines else "No claim extracted"
result["arguments"] = lines[1:4] if len(lines) > 1 else ["No structured arguments found"]
if not result["arguments"]:
result["arguments"] = ["No structured arguments found"]
return result
# ══════════════════════════════════════════════════════════════════════════
# SECTION 7 β€” AGENT SCORER
# ══════════════════════════════════════════════════════════════════════════
def score_agent(raw_text: str, parsed: dict) -> dict:
t = raw_text.lower()
return {
"Logic": min(10, 4 + len(parsed["arguments"]) * 2),
"Evidence": min(10, 3 + t.count("because")*2 + t.count("evidence")*2 + t.count("research") + t.count("study")),
"Clarity": min(10, 5 + (3 if parsed["claim"] else 0) + (2 if len(parsed["arguments"]) >= 2 else 0)),
"Rigor": min(10, 3 + t.count("however")*2 + t.count("although")*2 + t.count("data") + t.count("source")),
"Nuance": min(10, 3 + t.count("while")*2 + t.count("despite") + t.count("context") + t.count("limitation")),
}
# ══════════════════════════════════════════════════════════════════════════
# SECTION 8 β€” PIPELINE
# ══════════════════════════════════════════════════════════════════════════
def run_pipeline(topic: str, external_output: str = None, mode: str = "research"):
history = {}; parsed_all = {}; raw_embs = {}; scores = {}
print(" Computing topic embedding...")
query_emb = encode_query(topic)
for agent_name in AGENT_ORDER:
cfg = AGENT_CONFIG[agent_name]
print(f" Calling {cfg['display']} via API...")
system, user = build_prompt(agent_name, topic, history, mode, query_emb, external_output)
raw = generate_response(agent_name, system, user)
history[agent_name] = raw
parsed_all[agent_name] = parse_response(raw)
raw_embs[agent_name] = encode_single(raw)
scores[agent_name] = score_agent(raw, parsed_all[agent_name])
print(f" Done: {(parsed_all[agent_name]['claim'] or '')[:80]}")
from scripts.model_versioning import log_agent_run
log_agent_run(topic, mode, parsed_all, scores)
return history, parsed_all, raw_embs, scores, query_emb
# ══════════════════════════════════════════════════════════════════════════
# SECTION 9 β€” xAI LAYER
# ══════════════════════════════════════════════════════════════════════════
def compute_xai(history, parsed, embeddings, topic, mode, query_emb, external_output=None):
xai = {"influence_matrix": {}, "argument_attribution": {},
"counterfactual_scores": {}, "position_shifts": {}, "position_timeline": {}}
pairs = [("Proposer","Opposer"),("Proposer","Critic"),("Proposer","Synthesizer"),
("Opposer","Critic"),("Opposer","Synthesizer"),("Critic","Synthesizer")]
for src, tgt in pairs:
try: sim = float(np.dot(embeddings[src], embeddings[tgt]))
except: sim = 0.0
xai["influence_matrix"][f"{src} β†’ {tgt}"] = round(sim, 3)
opp_emb = embeddings.get("Opposer")
if opp_emb is not None:
for i, arg in enumerate(parsed.get("Proposer", {}).get("arguments", [])):
if not arg: continue
try:
ae = encode_single(arg)
xai["argument_attribution"][f"Prop Arg {i+1}: {arg[:40]}..."] = round(float(np.dot(ae, opp_emb)), 3)
except: pass
ce = embeddings.get("Critic"); se = embeddings.get("Synthesizer")
for src in ["Proposer","Opposer"]:
if ce is not None and src in embeddings:
try: xai["argument_attribution"][f"{src} β†’ Critic"] = round(float(np.dot(embeddings[src], ce)), 3)
except: pass
for src in ["Proposer","Opposer","Critic"]:
if se is not None and src in embeddings:
try: xai["argument_attribution"][f"{src} β†’ Synth"] = round(float(np.dot(embeddings[src], se)), 3)
except: pass
# Counterfactual: re-run Synthesizer without Critic
try:
print(" Counterfactual: Synthesizer without Critic...")
cf_hist = {k: history[k] for k in ["Proposer","Opposer"] if k in history}
sys_cf, user_cf = build_prompt("Synthesizer", topic, cf_hist, mode, query_emb, external_output)
cf_raw = generate_response("Synthesizer", sys_cf, user_cf)
cf_emb = encode_single(cf_raw)
causal = 1.0 - float(np.dot(cf_emb, se)) if se is not None else 0.0
xai["counterfactual_scores"]["Critic β†’ Synthesizer (causal)"] = round(causal, 3)
except Exception as e:
print(f" Counterfactual failed: {e}")
xai["counterfactual_scores"]["Critic β†’ Synthesizer (causal)"] = 0.0
for a in ["Opposer","Critic","Synthesizer"]:
if a in parsed:
xai["position_shifts"][a] = {
"influenced_by": parsed[a].get("influenced_by", "unknown"),
"position_shift": parsed[a].get("position_shift", "unknown"),
"confidence": parsed[a].get("confidence", 5),
}
xai["position_timeline"] = {a: parsed[a].get("confidence",5) for a in AGENT_ORDER if a in parsed}
return xai
# ══════════════════════════════════════════════════════════════════════════
# SECTION 10 β€” EXPORT HELPERS
# ══════════════════════════════════════════════════════════════════════════
def build_export_json(topic, mode, parsed, xai, sources) -> str:
export = {
"multi_agent_system_report": {
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
"model": MISTRAL_MODEL,
"topic": topic,
"mode": mode,
"agents": {
a: {
"claim": parsed[a].get("claim",""),
"arguments": parsed[a].get("arguments",[]),
"confidence": parsed[a].get("confidence",5),
"influenced_by": parsed[a].get("influenced_by",""),
"position_shift": parsed[a].get("position_shift",""),
"trust_score": parsed[a].get("trust_score"),
}
for a in AGENT_ORDER if a in parsed
},
"xai": {
"influence_matrix": xai.get("influence_matrix",{}),
"argument_attribution": xai.get("argument_attribution",{}),
"counterfactual_scores": xai.get("counterfactual_scores",{}),
},
"evidence_sources": sources,
}
}
return json.dumps(export, indent=2)
def build_export_text(topic, mode, parsed, xai) -> str:
lines = ["="*60, "MULTI AGENT SYSTEM β€” RESEARCH VERIFICATION REPORT",
f"Topic: {topic}", f"Mode: {mode}", f"Model: {MISTRAL_MODEL}",
f"Timestamp: {datetime.datetime.utcnow().isoformat()}Z", "="*60, ""]
for a in AGENT_ORDER:
if a not in parsed: continue
p = parsed[a]
lines += [f"[ {a.upper()} ]", f"Claim: {p.get('claim','')}"]
for i, arg in enumerate(p.get("arguments",[]), 1):
lines.append(f" Argument {i}: {arg}")
lines += [f" Confidence: {p.get('confidence',5)}/10",
f" Influenced by: {p.get('influenced_by','')}",
f" Position shift: {p.get('position_shift','')}"]
if p.get("trust_score") is not None:
lines.append(f" TRUST SCORE: {p['trust_score']}/100")
lines.append("")
lines += ["[ xAI β€” INFLUENCE ATTRIBUTION ]"]
for k, v in xai.get("influence_matrix",{}).items():
lines.append(f" {k}: {v:.3f}")
lines += ["", "[ xAI β€” COUNTERFACTUAL ]"]
for k, v in xai.get("counterfactual_scores",{}).items():
lines.append(f" {k}: {v:.3f}")
return "\n".join(lines)
# ══════════════════════════════════════════════════════════════════════════
# SECTION 11 β€” CHART BUILDERS
# ══════════════════════════════════════════════════════════════════════════
BG = "#0a0f1e"; TC = "#c9d1e0"
LB = dict(paper_bgcolor=BG, plot_bgcolor=BG,
font=dict(color=TC, family="DM Mono, monospace"),
margin=dict(l=40, r=40, t=50, b=40))
def _ef(title=""): fig=go.Figure(); fig.update_layout(title=title,**LB); return fig
def chart_influence_matrix(im):
if not im: return _ef("Influence Attribution Matrix")
agents = AGENT_ORDER; mat = np.zeros((4,4))
for pair, score in im.items():
parts = pair.split(" β†’ ")
if len(parts)==2 and parts[0] in agents and parts[1] in agents:
mat[agents.index(parts[0])][agents.index(parts[1])] = score
fig = go.Figure(go.Heatmap(z=np.round(mat,2), x=AGENT_ORDER, y=AGENT_ORDER,
colorscale=[[0,"#0a0f1e"],[0.5,"#1e3a5f"],[1,"#22c55e"]],
zmin=0, zmax=1, text=np.round(mat,2),
texttemplate="%{text}", showscale=True))
fig.update_layout(title="Influence Attribution Matrix", **LB)
return fig
def chart_argument_attribution(arg_scores):
if not arg_scores: return _ef("Argument Attribution")
labels=list(arg_scores.keys()); values=list(arg_scores.values())
colors=["#22c55e" if "Prop Arg" in l else "#3b82f6" if "Critic" in l
else "#f59e0b" if "Synth" in l else "#ef4444" for l in labels]
fig=go.Figure(go.Bar(x=labels,y=values,marker_color=colors,
text=[f"{v:.2f}" for v in values],textposition="outside"))
fig.update_layout(title="Argument Attribution Scores",
yaxis=dict(range=[0,1.15],title="Cosine Similarity"),
xaxis=dict(tickangle=-30),**LB)
return fig
def chart_counterfactual(cf_scores):
if not cf_scores: return _ef("Counterfactual Causal Influence")
labels=list(cf_scores.keys()); values=list(cf_scores.values())
fig=go.Figure(go.Bar(x=labels,y=values,marker_color=["#3b82f6"],
text=[f"{v:.3f}" for v in values],textposition="outside"))
fig.update_layout(title="Counterfactual: Critic's Causal Impact",
yaxis=dict(range=[0,max(max(values)*1.4,0.1)],title="Causal Influence Score"),**LB)
return fig
def chart_timeline(timeline):
if not timeline: return _ef("Confidence Timeline")
fig=go.Figure()
fig.add_trace(go.Scatter(x=list(timeline.keys()),y=list(timeline.values()),mode="lines",
line=dict(color="#1e3a5f",dash="dot"),showlegend=False))
for a,c in timeline.items():
if a not in COLOR_MAP: continue
_,hx=COLOR_MAP[a]
fig.add_trace(go.Scatter(x=[a],y=[c],mode="markers+text",
marker=dict(size=14,color=hx),
text=[str(c)],textposition="top center",name=a))
fig.update_layout(title="Agent Confidence Across Pipeline",
yaxis=dict(range=[0,12],title="Confidence /10"),**LB)
return fig
def chart_radar(all_scores):
if not all_scores: return _ef("Agent Rigor Radar")
fig=go.Figure()
for name,scores in all_scores.items():
if name not in COLOR_MAP: continue
rgb,hx=COLOR_MAP[name]
vals=list(scores.values())+[list(scores.values())[0]]
cats=SCORE_DIMS+[SCORE_DIMS[0]]
fig.add_trace(go.Scatterpolar(r=vals,theta=cats,fill="toself",name=name,
line=dict(color=hx,width=2),fillcolor=f"rgba({rgb},0.12)"))
fig.update_layout(polar=dict(radialaxis=dict(visible=True,range=[0,10])),
title="Agent Rigor Radar",legend=dict(bgcolor=BG),**LB)
return fig
def chart_confidence(parsed):
agents=[a for a in AGENT_ORDER if a in parsed]
if not agents: return _ef("Agent Confidence")
fig=go.Figure(go.Bar(x=agents,y=[parsed[a].get("confidence",5) for a in agents],
marker_color=[COLOR_MAP[a][1] for a in agents],
text=[parsed[a].get("confidence",5) for a in agents],textposition="outside"))
fig.update_layout(title="Agent Self-Reported Confidence",
yaxis=dict(range=[0,12],title="Confidence /10"),**LB)
return fig
def chart_heatmap(all_scores):
agents=[a for a in AGENT_ORDER if a in all_scores]
if not agents: return _ef("Rigor Heatmap")
mat=[[all_scores[a].get(d,0) for d in SCORE_DIMS] for a in agents]
fig=go.Figure(go.Heatmap(z=mat,x=SCORE_DIMS,y=agents,colorscale="YlGnBu",
zmin=0,zmax=10,text=mat,texttemplate="%{text}",showscale=True))
fig.update_layout(title="Agent Rigor Heatmap",**LB)
return fig
# ══════════════════════════════════════════════════════════════════════════
# SECTION 12 β€” HTML BUILDERS
# ══════════════════════════════════════════════════════════════════════════
PRODUCT_CSS = """
@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500&display=swap');
:root{--bg:#0a0f1e;--surface:#0f1729;--border:#1a2744;--text:#c9d1e0;--muted:#4a5568;
--green:#22c55e;--red:#ef4444;--blue:#3b82f6;--amber:#f59e0b;--purple:#a855f7}
body,.gradio-container{background:var(--bg)!important}
.mas-header{padding:3rem 2rem 2rem;border-bottom:1px solid var(--border);position:relative;overflow:hidden}
.mas-header::before{content:'';position:absolute;inset:0;
background:radial-gradient(ellipse 80% 60% at 50% -20%,rgba(34,197,94,.06),transparent);pointer-events:none}
.mas-wordmark{font-family:'DM Serif Display',serif;font-size:3rem;letter-spacing:-.02em;
color:#e8edf5;line-height:1;margin-bottom:.4rem}
.mas-wordmark em{font-style:italic;color:var(--green)}
.mas-tagline{font-family:'DM Mono',monospace;font-size:.72rem;letter-spacing:.2em;
color:var(--muted);text-transform:uppercase}
.mas-desc{font-family:'DM Sans',sans-serif;font-size:.95rem;color:#7a8ba0;
margin-top:1rem;max-width:600px;line-height:1.6}
.agent-grid{display:grid;grid-template-columns:1fr 1fr;gap:1rem;margin:1.5rem 0}
@media(max-width:768px){.agent-grid{grid-template-columns:1fr}}
.agent-card{background:var(--surface);border:1px solid var(--border);border-radius:12px;
padding:1.4rem;position:relative;overflow:hidden}
.agent-card::before{content:'';position:absolute;top:0;left:0;right:0;height:2px}
.agent-card.proposer::before{background:var(--green)}
.agent-card.opposer::before{background:var(--red)}
.agent-card.critic::before{background:var(--blue)}
.agent-card.synthesizer::before{background:var(--amber)}
.agent-label{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.18em;
text-transform:uppercase;margin-bottom:.6rem;display:flex;align-items:center;gap:.5rem}
.agent-dot{width:7px;height:7px;border-radius:50%;display:inline-block}
.agent-claim{font-family:'DM Serif Display',serif;font-size:1.05rem;color:#d8e0ed;
line-height:1.45;margin-bottom:1rem;font-style:italic}
.agent-args{list-style:none;padding:0;margin:0 0 1rem}
.agent-args li{font-family:'DM Sans',sans-serif;font-size:.875rem;color:var(--text);
padding:.45rem 0 .45rem 1rem;border-bottom:1px solid rgba(255,255,255,.04);
line-height:1.5;position:relative}
.agent-args li::before{content:'β€”';position:absolute;left:0;color:var(--muted)}
.agent-meta{display:flex;gap:.6rem;flex-wrap:wrap;margin-top:.8rem}
.meta-chip{font-family:'DM Mono',monospace;font-size:.65rem;padding:2px 10px;border-radius:100px;
background:rgba(255,255,255,.04);color:var(--muted);border:1px solid var(--border);letter-spacing:.05em}
.trust-banner{background:var(--surface);border:1px solid var(--border);border-radius:16px;
padding:2.5rem 2rem;text-align:center;margin:1.5rem 0}
.trust-score-num{font-family:'DM Serif Display',serif;font-size:5rem;line-height:1;font-weight:400}
.trust-label{font-family:'DM Mono',monospace;font-size:.7rem;letter-spacing:.2em;
text-transform:uppercase;color:var(--muted);margin-top:.3rem}
.trust-verdict{font-family:'DM Serif Display',serif;font-size:1.1rem;color:#c9d1e0;
margin-top:1.2rem;font-style:italic;max-width:600px;margin-left:auto;margin-right:auto;line-height:1.5}
.trust-meter{width:240px;height:8px;background:var(--border);border-radius:100px;margin:1rem auto 0;overflow:hidden}
.trust-fill{height:100%;border-radius:100px}
.xai-panel{background:var(--surface);border:1px solid var(--border);border-left:3px solid var(--purple);
border-radius:12px;padding:1.4rem;margin:1rem 0}
.xai-title{font-family:'DM Mono',monospace;font-size:.7rem;letter-spacing:.18em;
text-transform:uppercase;color:var(--purple);margin-bottom:1rem}
.xai-row{display:flex;justify-content:space-between;align-items:center;
padding:.4rem 0;border-bottom:1px solid rgba(255,255,255,.03)}
.xai-key{font-family:'DM Mono',monospace;font-size:.75rem;color:var(--text)}
.xai-val{font-family:'DM Mono',monospace;font-size:.8rem;color:var(--green);font-weight:500}
.sources-panel{background:rgba(10,15,30,.8);border:1px solid var(--border);border-radius:12px;padding:1.4rem;margin:1rem 0}
.sources-title{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.18em;
text-transform:uppercase;color:var(--muted);margin-bottom:1rem}
.source-item{margin-bottom:1rem}
.source-agent{font-family:'DM Mono',monospace;font-size:.7rem;color:var(--blue);margin-bottom:.3rem}
.source-snippet{font-family:'DM Sans',sans-serif;font-size:.8rem;color:#5a6a7e;line-height:1.5;
padding-left:.8rem;border-left:2px solid var(--border)}
.section-divider{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.2em;text-transform:uppercase;
color:var(--muted);text-align:center;padding:1.5rem 0 .5rem;
display:flex;align-items:center;gap:1rem}
.section-divider::before,.section-divider::after{content:'';flex:1;height:1px;background:var(--border)}
"""
def _tc(s):
if s is None: return "#4a5568"
if s>=75: return "#22c55e"
if s>=50: return "#f59e0b"
if s>=25: return "#ef4444"
return "#7f1d1d"
def _tl(s):
if s is None: return "UNSCORED"
if s>=75: return "WELL SUPPORTED"
if s>=50: return "CONTESTED"
if s>=25: return "WEAKLY SUPPORTED"
return "UNSUPPORTED"
def build_results_html(topic, mode, parsed, xai, sources):
synth = parsed.get("Synthesizer", {})
score = synth.get("trust_score")
color = _tc(score)
label = _tl(score)
score_disp = str(score) if score is not None else "β€”"
fill_pct = score if score is not None else 0
verdict = synth.get("claim", "No verdict extracted.")
mode_label = "CLAIM VERIFICATION" if mode=="verify" else "RESEARCH ANALYSIS"
trust_html = f"""
<div class="trust-banner" style="border-color:{color}30">
<div style="font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.2em;
color:var(--muted);text-transform:uppercase;margin-bottom:.5rem">
{mode_label} Β· {topic[:60].upper()}
</div>
<div class="trust-score-num" style="color:{color}">{score_disp}</div>
<div style="font-family:'DM Mono',monospace;font-size:.6rem;color:var(--muted);letter-spacing:.1em">/ 100</div>
<div class="trust-label" style="color:{color}">{label}</div>
<div class="trust-meter"><div class="trust-fill" style="width:{fill_pct}%;background:{color}"></div></div>
<div class="trust-verdict">"{verdict}"</div>
</div>"""
card_cls = {"Proposer":"proposer","Opposer":"opposer","Critic":"critic","Synthesizer":"synthesizer"}
cards = '<div class="agent-grid">'
for a in AGENT_ORDER:
if a not in parsed: continue
p = parsed[a]; cfg = AGENT_CONFIG[a]; cls = card_cls[a]
args_html = "".join(f"<li>{arg}</li>" for arg in p.get("arguments",[]))
trust_line = ""
if p.get("trust_score") is not None:
trust_line = f'<div style="font-family:DM Mono,monospace;font-size:.75rem;color:{_tc(p["trust_score"])};margin-top:.5rem">Trust Score: {p["trust_score"]}/100</div>'
cards += f"""
<div class="agent-card {cls}">
<div class="agent-label">
<span class="agent-dot" style="background:{cfg['color']}"></span>
<span style="color:{cfg['color']}">{a}</span>
<span style="color:var(--muted)">β€” {cfg['role_desc']}</span>
</div>
<div class="agent-claim">"{p.get('claim','No claim extracted.')}"</div>
<ul class="agent-args">{args_html}</ul>
{trust_line}
<div class="agent-meta">
<span class="meta-chip" style="color:{cfg['color']}">conf {p.get('confidence',5)}/10</span>
<span class="meta-chip">shifted: {p.get('position_shift','β€”')}</span>
<span class="meta-chip">influenced: {p.get('influenced_by','β€”')}</span>
</div>
</div>"""
cards += "</div>"
inf = xai.get("influence_matrix",{}); cf = xai.get("counterfactual_scores",{})
arg_attr = xai.get("argument_attribution",{})
top_inf = max(inf, key=inf.get) if inf else "N/A"
top_score = inf.get(top_inf, 0) if inf else 0
top_arg = max(arg_attr, key=arg_attr.get) if arg_attr else "N/A"
inf_rows = "".join(f'<div class="xai-row"><span class="xai-key">{k}</span><span class="xai-val">{v:.3f}</span></div>' for k,v in inf.items())
cf_rows = "".join(f'<div class="xai-row"><span class="xai-key">{k}</span><span class="xai-val">{v:.3f}</span></div>' for k,v in cf.items())
xai_html = f"""
<div class="xai-panel">
<div class="xai-title">xAI β€” Influence Attribution Layer</div>
<div style="font-family:'DM Sans',sans-serif;font-size:.85rem;color:#7a8ba0;margin-bottom:1rem">
Strongest influence: <strong style="color:#e8edf5">{top_inf}</strong> ({top_score:.3f}) Β·
Most attributed argument: <strong style="color:#e8edf5">{top_arg}</strong>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:1.5rem">
<div>
<div style="font-family:'DM Mono',monospace;font-size:.65rem;color:var(--muted);
letter-spacing:.15em;text-transform:uppercase;margin-bottom:.5rem">Semantic Influence Matrix</div>
{inf_rows}
</div>
<div>
<div style="font-family:'DM Mono',monospace;font-size:.65rem;color:var(--muted);
letter-spacing:.15em;text-transform:uppercase;margin-bottom:.5rem">Counterfactual Causal Score</div>
{cf_rows}
</div>
</div>
</div>"""
src_items = ""
for a, data in sources.items():
snippets = "".join(f'<div class="source-snippet">"{s}..."</div>' for s in data.get("snippets",[]))
src_items += f'<div class="source-item"><div class="source-agent">{a} ← {data.get("source_label","")}</div>{snippets}</div>'
sources_html = f"""
<div class="sources-panel">
<div class="sources-title">Evidence Transparency β€” Retrieved Sources</div>
{src_items}
</div>"""
return f"<style>{PRODUCT_CSS}</style>{trust_html}<div class='section-divider'>Agent Analysis</div>{cards}<div class='section-divider'>Explainability Layer</div>{xai_html}<div class='section-divider'>Evidence Sources</div>{sources_html}"
# ══════════════════════════════════════════════════════════════════════════
# SECTION 13 β€” ORCHESTRATOR
# ══════════════════════════════════════════════════════════════════════════
@monitor_request
def run_mas(topic: str, external_output: str, tab_mode: str):
empties = tuple(_ef() for _ in range(7))
if not topic.strip():
return ("<p style='color:#ef4444;font-family:DM Mono,monospace'>⚠ Enter a research topic.</p>",
"", "", "") + empties
if tab_mode == "Verify Claim / AI Output" and not external_output.strip():
return ("<p style='color:#ef4444;font-family:DM Mono,monospace'>⚠ Paste the claim or AI output to verify.</p>",
"", "", "") + empties
mode = "verify" if tab_mode == "Verify Claim / AI Output" else "research"
ext = external_output.strip() if mode == "verify" else None
print(f"\n{'='*60}\nMulti Agent System | Mode: {mode} | Topic: {topic}\n{'='*60}")
try:
_last_sources.clear()
history, parsed, embeddings, scores, query_emb = run_pipeline(topic, ext, mode)
xai = compute_xai(history, parsed, embeddings, topic, mode, query_emb, ext)
results_html = build_results_html(topic, mode, parsed, xai, dict(_last_sources))
json_out = build_export_json(topic, mode, parsed, xai, dict(_last_sources))
text_out = build_export_text(topic, mode, parsed, xai)
return (
results_html, json_out, text_out,
f"βœ“ Analysis complete β€” {datetime.datetime.utcnow().strftime('%H:%M:%S UTC')}",
chart_influence_matrix(xai["influence_matrix"]),
chart_argument_attribution(xai["argument_attribution"]),
chart_counterfactual(xai["counterfactual_scores"]),
chart_timeline(xai["position_timeline"]),
chart_radar(scores),
chart_confidence(parsed),
chart_heatmap(scores),
)
except Exception as exc:
import traceback; traceback.print_exc()
return (f"<p style='color:#ef4444;font-family:DM Mono,monospace'>Error: {exc}</p>",
"", "", "Error") + empties
# ══════════════════════════════════════════════════════════════════════════
# SECTION 14 β€” GRADIO UI
# ══════════════════════════════════════════════════════════════════════════
SAMPLE_TOPICS = [
"The long-term effects of social media on adolescent mental health",
"Whether large language models can achieve genuine reasoning",
"The effectiveness of universal basic income programs",
"Genetic engineering in agriculture: risks and benefits",
"The causal relationship between economic inequality and crime",
]
SAMPLE_CLAIMS = [
["Social media and mental health",
"Social media use is directly correlated with increased rates of depression and anxiety in teenagers, as evidenced by multiple longitudinal studies."],
["LLM reasoning capabilities",
"Current large language models do not perform genuine logical reasoning β€” they pattern-match on training data and cannot reliably solve novel problems."],
["Universal basic income",
"UBI pilot programs have consistently shown that unconditional cash transfers reduce poverty without reducing workforce participation."],
]
gr.close_all()
with gr.Blocks(theme=gr.themes.Base(), css=PRODUCT_CSS + """
.gradio-container{max-width:1100px!important;margin:0 auto}
button.primary{background:#22c55e!important;color:#0a0f1e!important;
font-family:'DM Mono',monospace!important;font-size:.8rem!important;
letter-spacing:.1em!important;border-radius:8px!important}
.gr-form,.gr-box{background:#0f1729!important;border-color:#1a2744!important}
label{font-family:'DM Mono',monospace!important;font-size:.72rem!important;
letter-spacing:.1em!important;color:#4a5568!important;text-transform:uppercase!important}
textarea,input{background:#080d1a!important;border-color:#1a2744!important;
color:#c9d1e0!important;font-family:'DM Sans',sans-serif!important}
""") as demo:
gr.HTML(f"""
<div class="mas-header">
<div class="mas-wordmark">Multi <em>Agent</em> System</div>
<div class="mas-tagline">Research Verification Β· Open Source Β· Powered by Mistral API ({MISTRAL_MODEL})</div>
<div class="mas-desc">
Submit a research topic or claim. Four independent AI agents β€” Proposer, Opposer,
Critic, and Synthesizer β€” analyse it using RAG-grounded evidence and produce a
calibrated, bias-audited verdict with full xAI transparency.
</div>
<div style="display:flex;gap:1rem;margin-top:1.5rem;flex-wrap:wrap">
<div style="font-family:'DM Mono',monospace;font-size:.68rem;color:#2d6a4f;background:rgba(34,197,94,.08);border:1px solid rgba(34,197,94,.2);padding:4px 12px;border-radius:100px">β—† Proposer β€” evidence-based support</div>
<div style="font-family:'DM Mono',monospace;font-size:.68rem;color:#7f1d1d;background:rgba(239,68,68,.08);border:1px solid rgba(239,68,68,.2);padding:4px 12px;border-radius:100px">β—† Opposer β€” evidence-based refutation</div>
<div style="font-family:'DM Mono',monospace;font-size:.68rem;color:#1e40af;background:rgba(59,130,246,.08);border:1px solid rgba(59,130,246,.2);padding:4px 12px;border-radius:100px">β—† Critic β€” logical consistency audit</div>
<div style="font-family:'DM Mono',monospace;font-size:.68rem;color:#78350f;background:rgba(245,158,11,.08);border:1px solid rgba(245,158,11,.2);padding:4px 12px;border-radius:100px">β—† Synthesizer β€” calibrated final verdict</div>
</div>
</div>
""")
with gr.Tabs():
with gr.TabItem("Research Topic"):
topic_input = gr.Textbox(
placeholder="e.g. The long-term effects of social media on adolescent mental health",
label="Research Topic or Question", lines=2
)
gr.Examples(examples=SAMPLE_TOPICS, inputs=topic_input, label="Example Topics")
tab_mode_1 = gr.State("Research Topic")
ext_1 = gr.State("")
run_btn_1 = gr.Button("Run Analysis", variant="primary", size="lg")
with gr.TabItem("Verify Claim / AI Output"):
gr.HTML('<div style="font-family:DM Sans,sans-serif;font-size:.875rem;color:#7a8ba0;padding:.75rem 0">Paste any claim, AI-generated text, or research statement. The system will audit its accuracy and bias.</div>')
topic_input_2 = gr.Textbox(placeholder="e.g. Effects of social media on mental health",
label="Topic / Context of the Claim", lines=1)
claim_input = gr.Textbox(placeholder="Paste the claim or AI-generated text to verify...",
label="Claim or AI Output to Verify", lines=4)
gr.Examples(examples=SAMPLE_CLAIMS, inputs=[topic_input_2, claim_input], label="Example Claims")
tab_mode_2 = gr.State("Verify Claim / AI Output")
run_btn_2 = gr.Button("Verify This Claim", variant="primary", size="lg")
status_bar = gr.Textbox(value="", label="", interactive=False,
placeholder="Ready β€” enter a topic and click Run")
gr.HTML('<div class="section-divider">Results</div>')
results_output = gr.HTML()
gr.HTML('<div class="section-divider">xAI Visualisation</div>')
with gr.Row():
influence_plot = gr.Plot(label="Influence Matrix")
arg_attr_plot = gr.Plot(label="Argument Attribution")
with gr.Row():
cf_plot = gr.Plot(label="Counterfactual Causal Score")
timeline_plot = gr.Plot(label="Confidence Timeline")
with gr.Row():
radar_plot = gr.Plot(label="Agent Rigor Radar")
conf_plot = gr.Plot(label="Confidence Levels")
heatmap_plot = gr.Plot(label="Rigor Heatmap")
gr.HTML('<div class="section-divider">Export for Research Use</div>')
gr.HTML('<div style="font-family:DM Sans,sans-serif;font-size:.8rem;color:#4a5568;padding:.5rem 0 1rem">Export results as structured JSON or plain text for use in research papers or datasets.</div>')
with gr.Row():
json_output = gr.Code(label="JSON Export", language="json", lines=12, interactive=False)
text_output = gr.Textbox(label="Plain Text Export", lines=12, interactive=False)
all_outputs = [results_output, json_output, text_output, status_bar,
influence_plot, arg_attr_plot, cf_plot, timeline_plot,
radar_plot, conf_plot, heatmap_plot]
def _run_tab1(topic, mode, ext): return run_mas(topic, ext, mode)
def _run_tab2(topic, claim, mode): return run_mas(topic, claim, mode)
run_btn_1.click(fn=_run_tab1, inputs=[topic_input, tab_mode_1, ext_1], outputs=all_outputs)
topic_input.submit(fn=_run_tab1, inputs=[topic_input, tab_mode_1, ext_1], outputs=all_outputs)
run_btn_2.click(fn=_run_tab2, inputs=[topic_input_2, claim_input, tab_mode_2], outputs=all_outputs)
demo.launch(share=True, show_error=True)