# ══════════════════════════════════════════════════════════════════════════ # MULTI AGENT SYSTEM — Research Verification Platform # Built on: Mistral API (mistral-small-latest) + FAISS RAG + xAI Layer # # Changes from local version: # - Local Mistral-7B replaced with Mistral API (~2-5s per agent vs ~8 mins) # - No GPU/VRAM required — runs on CPU only # - No bitsandbytes/accelerate/torch needed for inference # - torch only used for embedding model (CPU) # - Each agent call = one API request to mistral-small-latest # ══════════════════════════════════════════════════════════════════════════ import os os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["OMP_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" import os from dotenv import load_dotenv load_dotenv() # loads MISTRAL_API_KEY from .env import os, re, json, datetime import numpy as np import pandas as pd import faiss import gradio as gr import plotly.graph_objects as go #from mistralai import Mistral try: from mistralai import Mistral except ImportError: from mistralai.client import Mistral from datasets import load_dataset from sentence_transformers import SentenceTransformer import mlflow from monitoring import monitor_request from scripts.data_versioning import get_rag_stores # ══════════════════════════════════════════════════════════════════════════ # SECTION 1 — MISTRAL API CLIENT + EMBEDDING MODEL # ══════════════════════════════════════════════════════════════════════════ os.environ["TOKENIZERS_PARALLELISM"] = "false" print("Loading embedding model...") embed_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu") mistral_client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY", "")) MISTRAL_MODEL = "mistral-small-latest" print("Ready.") def encode_single(text: str) -> np.ndarray: return embed_model.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0] def encode_query(text: str) -> np.ndarray: return embed_model.encode([text], convert_to_numpy=True, normalize_embeddings=True) # ══════════════════════════════════════════════════════════════════════════ # SECTION 2 — RAG STORES # ══════════════════════════════════════════════════════════════════════════ CMV_PER_AGENT = 500 WIKI_TOTAL = 1000 WIKI_CHUNK_SZ = 150 def build_faiss_index(embeddings: np.ndarray) -> faiss.IndexFlatIP: index = faiss.IndexFlatIP(embeddings.shape[1]) index.add(embeddings) return index def build_rag_stores() -> dict: stores = {} print("\nLoading CMV dataset...") cmv = load_dataset("Siddish/change-my-view-subreddit-cleaned") df_cmv = pd.DataFrame(cmv["train"]) text_col = "text" if "text" in df_cmv.columns else df_cmv.columns[0] df_cmv = df_cmv[[text_col]].rename(columns={text_col: "text"}) df_cmv = df_cmv[df_cmv["text"].str.len() > 50].reset_index(drop=True) df_cmv["text"] = df_cmv["text"].str[:300] if "delta" in df_cmv.columns: df_prop = df_cmv[df_cmv["delta"] == True].head(CMV_PER_AGENT).reset_index(drop=True) df_opp = df_cmv[df_cmv["delta"] == False].head(CMV_PER_AGENT).reset_index(drop=True) else: mid = len(df_cmv) // 2 df_prop = df_cmv.iloc[:mid].head(CMV_PER_AGENT).reset_index(drop=True) df_opp = df_cmv.iloc[mid:].head(CMV_PER_AGENT).reset_index(drop=True) print(f" Proposer: {len(df_prop)} | Opposer: {len(df_opp)}") all_cmv = df_prop["text"].tolist() + df_opp["text"].tolist() all_embs = embed_model.encode(all_cmv, convert_to_numpy=True, batch_size=1, show_progress_bar=False, normalize_embeddings=True) stores["Proposer"] = {"texts": df_prop["text"].tolist(), "source": "CMV (pro-arguments)", "index": build_faiss_index(all_embs[:len(df_prop)])} stores["Opposer"] = {"texts": df_opp["text"].tolist(), "source": "CMV (counter-arguments)", "index": build_faiss_index(all_embs[len(df_prop):])} print("\nStreaming Wikipedia (~1000 chunks)...") wiki = load_dataset("wikimedia/wikipedia", "20231101.en", split="train", streaming=True) chunks = [] for row in wiki: words = row["text"].split() for i in range(0, len(words), WIKI_CHUNK_SZ): chunk = " ".join(words[i:i+WIKI_CHUNK_SZ]) if 60 <= len(chunk.split()) <= WIKI_CHUNK_SZ: chunks.append(chunk) if len(chunks) >= WIKI_TOTAL: break chunks = chunks[:WIKI_TOTAL] mid_wiki = len(chunks) // 2 wiki_embs = embed_model.encode(chunks, convert_to_numpy=True, batch_size=1, show_progress_bar=False, normalize_embeddings=True) stores["Critic"] = {"texts": chunks[:mid_wiki], "source": "Wikipedia (factual)", "index": build_faiss_index(wiki_embs[:mid_wiki])} stores["Synthesizer"] = {"texts": chunks[mid_wiki:], "source": "Wikipedia (balanced)", "index": build_faiss_index(wiki_embs[mid_wiki:])} print("\nKnowledge stores ready.") return stores print("\nBuilding knowledge stores (~1-2 mins)...") #RAG_STORES = build_rag_stores() RAG_STORES = get_rag_stores(build_rag_stores) _last_sources = {} def retrieve_for_agent(agent_name: str, query_emb: np.ndarray, k: int = 3) -> list: store = RAG_STORES[agent_name] D, I = store["index"].search(query_emb, k) docs = [store["texts"][i] for i in I[0] if i < len(store["texts"])] _last_sources[agent_name] = { "source_label": store["source"], "snippets": [d[:120] for d in docs] } return docs # ══════════════════════════════════════════════════════════════════════════ # SECTION 3 — AGENT DEFINITIONS # ══════════════════════════════════════════════════════════════════════════ _FMT_RULES = """\ STRICT OUTPUT RULES: 1. Output ONLY the labeled fields below, nothing else. 2. Each field starts at the beginning of a new line. 3. CLAIM must be exactly ONE sentence. 4. Each ARGUMENT must be ONE sentence on its own line. 5. Do NOT add prose, explanations, or extra fields.""" _PROPOSER_EX = """\ CLAIM: Renewable energy is the most viable path to carbon neutrality. ARGUMENT 1: Solar costs have fallen 90% in the last decade making it cost-competitive. ARGUMENT 2: Wind and solar combined can meet baseload demand with sufficient storage. ARGUMENT 3: Renewable investment creates more jobs per dollar than fossil fuel investment. CONFIDENCE: 8 INFLUENCED_BY: none POSITION_SHIFT: none""" _OPPOSER_EX = """\ CLAIM: Renewable energy alone cannot reliably replace fossil fuels today. ARGUMENT 1: Grid-scale storage remains prohibitively expensive for 100% renewable grids. ARGUMENT 2: Manufacturing solar panels requires significant upfront carbon emissions. ARGUMENT 3: Energy demand in developing nations is growing faster than renewable capacity. CONFIDENCE: 7 INFLUENCED_BY: Proposer Argument 1 POSITION_SHIFT: none""" _CRITIC_EX = """\ CLAIM: Both positions contain valid points but overstate certainty in key areas. ARGUMENT 1: Proposer's cost claims are accurate but ignore storage and grid upgrade costs. ARGUMENT 2: Opposer's manufacturing emissions argument ignores lifecycle carbon accounting. ARGUMENT 3: Neither position addresses policy and governance barriers adequately. CONFIDENCE: 8 INFLUENCED_BY: Opposer POSITION_SHIFT: slight""" _SYNTH_EX = """\ CLAIM: A balanced renewable transition is achievable but requires addressing storage and policy gaps. ARGUMENT 1: Cost trends strongly support renewables as the long-term primary energy source. ARGUMENT 2: Transition requires parallel investment in storage, grid infrastructure, and policy. ARGUMENT 3: The Critic's lifecycle analysis provides the most accurate framing of trade-offs. CONFIDENCE: 8 TRUST_SCORE: 74 INFLUENCED_BY: Critic POSITION_SHIFT: moderate""" AGENT_CONFIG = { "Proposer": { "temperature": 0.8, "color": "#22c55e", "display": "Proposer", "role_desc": "Defends the claim with evidence", "system": ( "You are the Proposer in a research verification system. " "Present the strongest evidence-based case IN SUPPORT of the claim.\n\n" f"{_FMT_RULES}\n\nExample:\n{_PROPOSER_EX}" ), }, "Opposer": { "temperature": 0.8, "color": "#ef4444", "display": "Opposer", "role_desc": "Challenges with counter-evidence", "system": ( "You are the Opposer in a research verification system. " "Present the strongest evidence-based case AGAINST the claim.\n\n" f"{_FMT_RULES}\n\nExample:\n{_OPPOSER_EX}" ), }, "Critic": { "temperature": 0.3, "color": "#3b82f6", "display": "Critic", "role_desc": "Audits logical consistency of both sides", "system": ( "You are the Critic in a research verification system. " "Objectively evaluate logical consistency and evidential quality of BOTH " "the Proposer and Opposer. Identify overstatements and logical fallacies.\n\n" f"{_FMT_RULES}\n\nExample:\n{_CRITIC_EX}" ), }, "Synthesizer": { "temperature": 0.4, "color": "#f59e0b", "display": "Synthesizer", "role_desc": "Produces a calibrated final verdict", "system": ( "You are the Synthesizer in a research verification system. " "Produce a calibrated, balanced verdict. " "TRUST_SCORE (0-100) = how well-supported the original claim is. " "0=completely unsupported, 50=contested, 100=strongly supported.\n\n" f"{_FMT_RULES}\n\nExample:\n{_SYNTH_EX}" ), }, } CONTEXT_MAP = { "Proposer": [], "Opposer": ["Proposer"], "Critic": ["Proposer", "Opposer"], "Synthesizer": ["Proposer", "Opposer", "Critic"], } AGENT_ORDER = ["Proposer", "Opposer", "Critic", "Synthesizer"] SCORE_DIMS = ["Logic", "Evidence", "Clarity", "Rigor", "Nuance"] COLOR_MAP = { "Proposer": ("34,197,94", "#22c55e"), "Opposer": ("239,68,68", "#ef4444"), "Critic": ("59,130,246", "#3b82f6"), "Synthesizer": ("245,158,11", "#f59e0b"), } # ══════════════════════════════════════════════════════════════════════════ # SECTION 4 — MISTRAL API CALL # ══════════════════════════════════════════════════════════════════════════ def generate_response(agent_name: str, system_prompt: str, user_prompt: str) -> str: temperature = AGENT_CONFIG[agent_name]["temperature"] try: response = mistral_client.chat.complete( model=MISTRAL_MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=temperature, max_tokens=400, ) return response.choices[0].message.content.strip() except Exception as e: if "api_key" in str(e).lower() or "authentication" in str(e).lower() or "401" in str(e): raise RuntimeError( "Mistral API key missing or invalid. " "Set os.environ['MISTRAL_API_KEY'] in Cell 1 and restart." ) raise RuntimeError(f"API error for {agent_name}: {str(e)}") # ══════════════════════════════════════════════════════════════════════════ # SECTION 5 — PROMPT BUILDER # ══════════════════════════════════════════════════════════════════════════ def build_prompt(agent_name: str, topic: str, history: dict, mode: str, query_emb: np.ndarray, external_output: str = None) -> tuple: system = AGENT_CONFIG[agent_name]["system"] if mode == "verify" and external_output: seed = f"Research claim to verify:\n\"{external_output[:500]}\"\n\nOriginal topic: {topic}" else: seed = f"Research topic: {topic}" prior = "" for pa in CONTEXT_MAP[agent_name]: snippet = history.get(pa, "")[:500] prior += f"\n\n--- {pa} ---\n{snippet}" rag_docs = retrieve_for_agent(agent_name, query_emb, k=3) rag_text = "\n".join([f"• {doc[:120]}" for doc in rag_docs]) user = ( f"{seed}{prior}\n\n" f"Evidence from knowledge base:\n{rag_text}\n\n" f"Respond in the format specified. Be concise." ) return system, user # ══════════════════════════════════════════════════════════════════════════ # SECTION 6 — RESPONSE PARSER # ══════════════════════════════════════════════════════════════════════════ def parse_response(text: str) -> dict: result = {"claim": "", "arguments": [], "confidence": 5, "influenced_by": "unknown", "position_shift": "unknown", "trust_score": None} def _get(pattern, default=""): m = re.search(pattern, text, re.DOTALL | re.IGNORECASE) return m.group(1).strip() if m else default result["claim"] = _get(r"CLAIM:\s*(.+?)(?=ARGUMENT\s*1|CONFIDENCE|$)") for i in range(1, 4): arg = _get(rf"ARGUMENT\s*{i}:\s*(.+?)(?=ARGUMENT\s*{i+1}|CONFIDENCE|INFLUENCED|TRUST|$)") if arg: result["arguments"].append(arg) conf = re.search(r"CONFIDENCE:\s*(\d+)", text, re.IGNORECASE) if conf: result["confidence"] = min(10, max(1, int(conf.group(1)))) trust = re.search(r"TRUST_SCORE:\s*(\d+)", text, re.IGNORECASE) if trust: result["trust_score"] = min(100, max(0, int(trust.group(1)))) result["influenced_by"] = _get(r"INFLUENCED_BY:\s*(.+?)(?=\n|POSITION|TRUST|$)", "unknown") result["position_shift"] = _get(r"POSITION_SHIFT:\s*(.+?)(?=\n|$)", "unknown") if not result["claim"] and not result["arguments"]: lines = [l.strip() for l in text.split("\n") if l.strip()] result["claim"] = lines[0] if lines else "No claim extracted" result["arguments"] = lines[1:4] if len(lines) > 1 else ["No structured arguments found"] if not result["arguments"]: result["arguments"] = ["No structured arguments found"] return result # ══════════════════════════════════════════════════════════════════════════ # SECTION 7 — AGENT SCORER # ══════════════════════════════════════════════════════════════════════════ def score_agent(raw_text: str, parsed: dict) -> dict: t = raw_text.lower() return { "Logic": min(10, 4 + len(parsed["arguments"]) * 2), "Evidence": min(10, 3 + t.count("because")*2 + t.count("evidence")*2 + t.count("research") + t.count("study")), "Clarity": min(10, 5 + (3 if parsed["claim"] else 0) + (2 if len(parsed["arguments"]) >= 2 else 0)), "Rigor": min(10, 3 + t.count("however")*2 + t.count("although")*2 + t.count("data") + t.count("source")), "Nuance": min(10, 3 + t.count("while")*2 + t.count("despite") + t.count("context") + t.count("limitation")), } # ══════════════════════════════════════════════════════════════════════════ # SECTION 8 — PIPELINE # ══════════════════════════════════════════════════════════════════════════ def run_pipeline(topic: str, external_output: str = None, mode: str = "research"): history = {}; parsed_all = {}; raw_embs = {}; scores = {} print(" Computing topic embedding...") query_emb = encode_query(topic) for agent_name in AGENT_ORDER: cfg = AGENT_CONFIG[agent_name] print(f" Calling {cfg['display']} via API...") system, user = build_prompt(agent_name, topic, history, mode, query_emb, external_output) raw = generate_response(agent_name, system, user) history[agent_name] = raw parsed_all[agent_name] = parse_response(raw) raw_embs[agent_name] = encode_single(raw) scores[agent_name] = score_agent(raw, parsed_all[agent_name]) print(f" Done: {(parsed_all[agent_name]['claim'] or '')[:80]}") from scripts.model_versioning import log_agent_run log_agent_run(topic, mode, parsed_all, scores) return history, parsed_all, raw_embs, scores, query_emb # ══════════════════════════════════════════════════════════════════════════ # SECTION 9 — xAI LAYER # ══════════════════════════════════════════════════════════════════════════ def compute_xai(history, parsed, embeddings, topic, mode, query_emb, external_output=None): xai = {"influence_matrix": {}, "argument_attribution": {}, "counterfactual_scores": {}, "position_shifts": {}, "position_timeline": {}} pairs = [("Proposer","Opposer"),("Proposer","Critic"),("Proposer","Synthesizer"), ("Opposer","Critic"),("Opposer","Synthesizer"),("Critic","Synthesizer")] for src, tgt in pairs: try: sim = float(np.dot(embeddings[src], embeddings[tgt])) except: sim = 0.0 xai["influence_matrix"][f"{src} → {tgt}"] = round(sim, 3) opp_emb = embeddings.get("Opposer") if opp_emb is not None: for i, arg in enumerate(parsed.get("Proposer", {}).get("arguments", [])): if not arg: continue try: ae = encode_single(arg) xai["argument_attribution"][f"Prop Arg {i+1}: {arg[:40]}..."] = round(float(np.dot(ae, opp_emb)), 3) except: pass ce = embeddings.get("Critic"); se = embeddings.get("Synthesizer") for src in ["Proposer","Opposer"]: if ce is not None and src in embeddings: try: xai["argument_attribution"][f"{src} → Critic"] = round(float(np.dot(embeddings[src], ce)), 3) except: pass for src in ["Proposer","Opposer","Critic"]: if se is not None and src in embeddings: try: xai["argument_attribution"][f"{src} → Synth"] = round(float(np.dot(embeddings[src], se)), 3) except: pass # Counterfactual: re-run Synthesizer without Critic try: print(" Counterfactual: Synthesizer without Critic...") cf_hist = {k: history[k] for k in ["Proposer","Opposer"] if k in history} sys_cf, user_cf = build_prompt("Synthesizer", topic, cf_hist, mode, query_emb, external_output) cf_raw = generate_response("Synthesizer", sys_cf, user_cf) cf_emb = encode_single(cf_raw) causal = 1.0 - float(np.dot(cf_emb, se)) if se is not None else 0.0 xai["counterfactual_scores"]["Critic → Synthesizer (causal)"] = round(causal, 3) except Exception as e: print(f" Counterfactual failed: {e}") xai["counterfactual_scores"]["Critic → Synthesizer (causal)"] = 0.0 for a in ["Opposer","Critic","Synthesizer"]: if a in parsed: xai["position_shifts"][a] = { "influenced_by": parsed[a].get("influenced_by", "unknown"), "position_shift": parsed[a].get("position_shift", "unknown"), "confidence": parsed[a].get("confidence", 5), } xai["position_timeline"] = {a: parsed[a].get("confidence",5) for a in AGENT_ORDER if a in parsed} return xai # ══════════════════════════════════════════════════════════════════════════ # SECTION 10 — EXPORT HELPERS # ══════════════════════════════════════════════════════════════════════════ def build_export_json(topic, mode, parsed, xai, sources) -> str: export = { "multi_agent_system_report": { "timestamp": datetime.datetime.utcnow().isoformat() + "Z", "model": MISTRAL_MODEL, "topic": topic, "mode": mode, "agents": { a: { "claim": parsed[a].get("claim",""), "arguments": parsed[a].get("arguments",[]), "confidence": parsed[a].get("confidence",5), "influenced_by": parsed[a].get("influenced_by",""), "position_shift": parsed[a].get("position_shift",""), "trust_score": parsed[a].get("trust_score"), } for a in AGENT_ORDER if a in parsed }, "xai": { "influence_matrix": xai.get("influence_matrix",{}), "argument_attribution": xai.get("argument_attribution",{}), "counterfactual_scores": xai.get("counterfactual_scores",{}), }, "evidence_sources": sources, } } return json.dumps(export, indent=2) def build_export_text(topic, mode, parsed, xai) -> str: lines = ["="*60, "MULTI AGENT SYSTEM — RESEARCH VERIFICATION REPORT", f"Topic: {topic}", f"Mode: {mode}", f"Model: {MISTRAL_MODEL}", f"Timestamp: {datetime.datetime.utcnow().isoformat()}Z", "="*60, ""] for a in AGENT_ORDER: if a not in parsed: continue p = parsed[a] lines += [f"[ {a.upper()} ]", f"Claim: {p.get('claim','')}"] for i, arg in enumerate(p.get("arguments",[]), 1): lines.append(f" Argument {i}: {arg}") lines += [f" Confidence: {p.get('confidence',5)}/10", f" Influenced by: {p.get('influenced_by','')}", f" Position shift: {p.get('position_shift','')}"] if p.get("trust_score") is not None: lines.append(f" TRUST SCORE: {p['trust_score']}/100") lines.append("") lines += ["[ xAI — INFLUENCE ATTRIBUTION ]"] for k, v in xai.get("influence_matrix",{}).items(): lines.append(f" {k}: {v:.3f}") lines += ["", "[ xAI — COUNTERFACTUAL ]"] for k, v in xai.get("counterfactual_scores",{}).items(): lines.append(f" {k}: {v:.3f}") return "\n".join(lines) # ══════════════════════════════════════════════════════════════════════════ # SECTION 11 — CHART BUILDERS # ══════════════════════════════════════════════════════════════════════════ BG = "#0a0f1e"; TC = "#c9d1e0" LB = dict(paper_bgcolor=BG, plot_bgcolor=BG, font=dict(color=TC, family="DM Mono, monospace"), margin=dict(l=40, r=40, t=50, b=40)) def _ef(title=""): fig=go.Figure(); fig.update_layout(title=title,**LB); return fig def chart_influence_matrix(im): if not im: return _ef("Influence Attribution Matrix") agents = AGENT_ORDER; mat = np.zeros((4,4)) for pair, score in im.items(): parts = pair.split(" → ") if len(parts)==2 and parts[0] in agents and parts[1] in agents: mat[agents.index(parts[0])][agents.index(parts[1])] = score fig = go.Figure(go.Heatmap(z=np.round(mat,2), x=AGENT_ORDER, y=AGENT_ORDER, colorscale=[[0,"#0a0f1e"],[0.5,"#1e3a5f"],[1,"#22c55e"]], zmin=0, zmax=1, text=np.round(mat,2), texttemplate="%{text}", showscale=True)) fig.update_layout(title="Influence Attribution Matrix", **LB) return fig def chart_argument_attribution(arg_scores): if not arg_scores: return _ef("Argument Attribution") labels=list(arg_scores.keys()); values=list(arg_scores.values()) colors=["#22c55e" if "Prop Arg" in l else "#3b82f6" if "Critic" in l else "#f59e0b" if "Synth" in l else "#ef4444" for l in labels] fig=go.Figure(go.Bar(x=labels,y=values,marker_color=colors, text=[f"{v:.2f}" for v in values],textposition="outside")) fig.update_layout(title="Argument Attribution Scores", yaxis=dict(range=[0,1.15],title="Cosine Similarity"), xaxis=dict(tickangle=-30),**LB) return fig def chart_counterfactual(cf_scores): if not cf_scores: return _ef("Counterfactual Causal Influence") labels=list(cf_scores.keys()); values=list(cf_scores.values()) fig=go.Figure(go.Bar(x=labels,y=values,marker_color=["#3b82f6"], text=[f"{v:.3f}" for v in values],textposition="outside")) fig.update_layout(title="Counterfactual: Critic's Causal Impact", yaxis=dict(range=[0,max(max(values)*1.4,0.1)],title="Causal Influence Score"),**LB) return fig def chart_timeline(timeline): if not timeline: return _ef("Confidence Timeline") fig=go.Figure() fig.add_trace(go.Scatter(x=list(timeline.keys()),y=list(timeline.values()),mode="lines", line=dict(color="#1e3a5f",dash="dot"),showlegend=False)) for a,c in timeline.items(): if a not in COLOR_MAP: continue _,hx=COLOR_MAP[a] fig.add_trace(go.Scatter(x=[a],y=[c],mode="markers+text", marker=dict(size=14,color=hx), text=[str(c)],textposition="top center",name=a)) fig.update_layout(title="Agent Confidence Across Pipeline", yaxis=dict(range=[0,12],title="Confidence /10"),**LB) return fig def chart_radar(all_scores): if not all_scores: return _ef("Agent Rigor Radar") fig=go.Figure() for name,scores in all_scores.items(): if name not in COLOR_MAP: continue rgb,hx=COLOR_MAP[name] vals=list(scores.values())+[list(scores.values())[0]] cats=SCORE_DIMS+[SCORE_DIMS[0]] fig.add_trace(go.Scatterpolar(r=vals,theta=cats,fill="toself",name=name, line=dict(color=hx,width=2),fillcolor=f"rgba({rgb},0.12)")) fig.update_layout(polar=dict(radialaxis=dict(visible=True,range=[0,10])), title="Agent Rigor Radar",legend=dict(bgcolor=BG),**LB) return fig def chart_confidence(parsed): agents=[a for a in AGENT_ORDER if a in parsed] if not agents: return _ef("Agent Confidence") fig=go.Figure(go.Bar(x=agents,y=[parsed[a].get("confidence",5) for a in agents], marker_color=[COLOR_MAP[a][1] for a in agents], text=[parsed[a].get("confidence",5) for a in agents],textposition="outside")) fig.update_layout(title="Agent Self-Reported Confidence", yaxis=dict(range=[0,12],title="Confidence /10"),**LB) return fig def chart_heatmap(all_scores): agents=[a for a in AGENT_ORDER if a in all_scores] if not agents: return _ef("Rigor Heatmap") mat=[[all_scores[a].get(d,0) for d in SCORE_DIMS] for a in agents] fig=go.Figure(go.Heatmap(z=mat,x=SCORE_DIMS,y=agents,colorscale="YlGnBu", zmin=0,zmax=10,text=mat,texttemplate="%{text}",showscale=True)) fig.update_layout(title="Agent Rigor Heatmap",**LB) return fig # ══════════════════════════════════════════════════════════════════════════ # SECTION 12 — HTML BUILDERS # ══════════════════════════════════════════════════════════════════════════ PRODUCT_CSS = """ @import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500&display=swap'); :root{--bg:#0a0f1e;--surface:#0f1729;--border:#1a2744;--text:#c9d1e0;--muted:#4a5568; --green:#22c55e;--red:#ef4444;--blue:#3b82f6;--amber:#f59e0b;--purple:#a855f7} body,.gradio-container{background:var(--bg)!important} .mas-header{padding:3rem 2rem 2rem;border-bottom:1px solid var(--border);position:relative;overflow:hidden} .mas-header::before{content:'';position:absolute;inset:0; background:radial-gradient(ellipse 80% 60% at 50% -20%,rgba(34,197,94,.06),transparent);pointer-events:none} .mas-wordmark{font-family:'DM Serif Display',serif;font-size:3rem;letter-spacing:-.02em; color:#e8edf5;line-height:1;margin-bottom:.4rem} .mas-wordmark em{font-style:italic;color:var(--green)} .mas-tagline{font-family:'DM Mono',monospace;font-size:.72rem;letter-spacing:.2em; color:var(--muted);text-transform:uppercase} .mas-desc{font-family:'DM Sans',sans-serif;font-size:.95rem;color:#7a8ba0; margin-top:1rem;max-width:600px;line-height:1.6} .agent-grid{display:grid;grid-template-columns:1fr 1fr;gap:1rem;margin:1.5rem 0} @media(max-width:768px){.agent-grid{grid-template-columns:1fr}} .agent-card{background:var(--surface);border:1px solid var(--border);border-radius:12px; padding:1.4rem;position:relative;overflow:hidden} .agent-card::before{content:'';position:absolute;top:0;left:0;right:0;height:2px} .agent-card.proposer::before{background:var(--green)} .agent-card.opposer::before{background:var(--red)} .agent-card.critic::before{background:var(--blue)} .agent-card.synthesizer::before{background:var(--amber)} .agent-label{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.18em; text-transform:uppercase;margin-bottom:.6rem;display:flex;align-items:center;gap:.5rem} .agent-dot{width:7px;height:7px;border-radius:50%;display:inline-block} .agent-claim{font-family:'DM Serif Display',serif;font-size:1.05rem;color:#d8e0ed; line-height:1.45;margin-bottom:1rem;font-style:italic} .agent-args{list-style:none;padding:0;margin:0 0 1rem} .agent-args li{font-family:'DM Sans',sans-serif;font-size:.875rem;color:var(--text); padding:.45rem 0 .45rem 1rem;border-bottom:1px solid rgba(255,255,255,.04); line-height:1.5;position:relative} .agent-args li::before{content:'—';position:absolute;left:0;color:var(--muted)} .agent-meta{display:flex;gap:.6rem;flex-wrap:wrap;margin-top:.8rem} .meta-chip{font-family:'DM Mono',monospace;font-size:.65rem;padding:2px 10px;border-radius:100px; background:rgba(255,255,255,.04);color:var(--muted);border:1px solid var(--border);letter-spacing:.05em} .trust-banner{background:var(--surface);border:1px solid var(--border);border-radius:16px; padding:2.5rem 2rem;text-align:center;margin:1.5rem 0} .trust-score-num{font-family:'DM Serif Display',serif;font-size:5rem;line-height:1;font-weight:400} .trust-label{font-family:'DM Mono',monospace;font-size:.7rem;letter-spacing:.2em; text-transform:uppercase;color:var(--muted);margin-top:.3rem} .trust-verdict{font-family:'DM Serif Display',serif;font-size:1.1rem;color:#c9d1e0; margin-top:1.2rem;font-style:italic;max-width:600px;margin-left:auto;margin-right:auto;line-height:1.5} .trust-meter{width:240px;height:8px;background:var(--border);border-radius:100px;margin:1rem auto 0;overflow:hidden} .trust-fill{height:100%;border-radius:100px} .xai-panel{background:var(--surface);border:1px solid var(--border);border-left:3px solid var(--purple); border-radius:12px;padding:1.4rem;margin:1rem 0} .xai-title{font-family:'DM Mono',monospace;font-size:.7rem;letter-spacing:.18em; text-transform:uppercase;color:var(--purple);margin-bottom:1rem} .xai-row{display:flex;justify-content:space-between;align-items:center; padding:.4rem 0;border-bottom:1px solid rgba(255,255,255,.03)} .xai-key{font-family:'DM Mono',monospace;font-size:.75rem;color:var(--text)} .xai-val{font-family:'DM Mono',monospace;font-size:.8rem;color:var(--green);font-weight:500} .sources-panel{background:rgba(10,15,30,.8);border:1px solid var(--border);border-radius:12px;padding:1.4rem;margin:1rem 0} .sources-title{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.18em; text-transform:uppercase;color:var(--muted);margin-bottom:1rem} .source-item{margin-bottom:1rem} .source-agent{font-family:'DM Mono',monospace;font-size:.7rem;color:var(--blue);margin-bottom:.3rem} .source-snippet{font-family:'DM Sans',sans-serif;font-size:.8rem;color:#5a6a7e;line-height:1.5; padding-left:.8rem;border-left:2px solid var(--border)} .section-divider{font-family:'DM Mono',monospace;font-size:.65rem;letter-spacing:.2em;text-transform:uppercase; color:var(--muted);text-align:center;padding:1.5rem 0 .5rem; display:flex;align-items:center;gap:1rem} .section-divider::before,.section-divider::after{content:'';flex:1;height:1px;background:var(--border)} """ def _tc(s): if s is None: return "#4a5568" if s>=75: return "#22c55e" if s>=50: return "#f59e0b" if s>=25: return "#ef4444" return "#7f1d1d" def _tl(s): if s is None: return "UNSCORED" if s>=75: return "WELL SUPPORTED" if s>=50: return "CONTESTED" if s>=25: return "WEAKLY SUPPORTED" return "UNSUPPORTED" def build_results_html(topic, mode, parsed, xai, sources): synth = parsed.get("Synthesizer", {}) score = synth.get("trust_score") color = _tc(score) label = _tl(score) score_disp = str(score) if score is not None else "—" fill_pct = score if score is not None else 0 verdict = synth.get("claim", "No verdict extracted.") mode_label = "CLAIM VERIFICATION" if mode=="verify" else "RESEARCH ANALYSIS" trust_html = f"""
{mode_label} · {topic[:60].upper()}
{score_disp}
/ 100
{label}
"{verdict}"
""" card_cls = {"Proposer":"proposer","Opposer":"opposer","Critic":"critic","Synthesizer":"synthesizer"} cards = '
' for a in AGENT_ORDER: if a not in parsed: continue p = parsed[a]; cfg = AGENT_CONFIG[a]; cls = card_cls[a] args_html = "".join(f"
  • {arg}
  • " for arg in p.get("arguments",[])) trust_line = "" if p.get("trust_score") is not None: trust_line = f'
    Trust Score: {p["trust_score"]}/100
    ' cards += f"""
    {a} — {cfg['role_desc']}
    "{p.get('claim','No claim extracted.')}"
    {trust_line}
    conf {p.get('confidence',5)}/10 shifted: {p.get('position_shift','—')} influenced: {p.get('influenced_by','—')}
    """ cards += "
    " inf = xai.get("influence_matrix",{}); cf = xai.get("counterfactual_scores",{}) arg_attr = xai.get("argument_attribution",{}) top_inf = max(inf, key=inf.get) if inf else "N/A" top_score = inf.get(top_inf, 0) if inf else 0 top_arg = max(arg_attr, key=arg_attr.get) if arg_attr else "N/A" inf_rows = "".join(f'
    {k}{v:.3f}
    ' for k,v in inf.items()) cf_rows = "".join(f'
    {k}{v:.3f}
    ' for k,v in cf.items()) xai_html = f"""
    xAI — Influence Attribution Layer
    Strongest influence: {top_inf} ({top_score:.3f}) · Most attributed argument: {top_arg}
    Semantic Influence Matrix
    {inf_rows}
    Counterfactual Causal Score
    {cf_rows}
    """ src_items = "" for a, data in sources.items(): snippets = "".join(f'
    "{s}..."
    ' for s in data.get("snippets",[])) src_items += f'
    {a} ← {data.get("source_label","")}
    {snippets}
    ' sources_html = f"""
    Evidence Transparency — Retrieved Sources
    {src_items}
    """ return f"{trust_html}
    Agent Analysis
    {cards}
    Explainability Layer
    {xai_html}
    Evidence Sources
    {sources_html}" # ══════════════════════════════════════════════════════════════════════════ # SECTION 13 — ORCHESTRATOR # ══════════════════════════════════════════════════════════════════════════ @monitor_request def run_mas(topic: str, external_output: str, tab_mode: str): empties = tuple(_ef() for _ in range(7)) if not topic.strip(): return ("

    ⚠ Enter a research topic.

    ", "", "", "") + empties if tab_mode == "Verify Claim / AI Output" and not external_output.strip(): return ("

    ⚠ Paste the claim or AI output to verify.

    ", "", "", "") + empties mode = "verify" if tab_mode == "Verify Claim / AI Output" else "research" ext = external_output.strip() if mode == "verify" else None print(f"\n{'='*60}\nMulti Agent System | Mode: {mode} | Topic: {topic}\n{'='*60}") try: _last_sources.clear() history, parsed, embeddings, scores, query_emb = run_pipeline(topic, ext, mode) xai = compute_xai(history, parsed, embeddings, topic, mode, query_emb, ext) results_html = build_results_html(topic, mode, parsed, xai, dict(_last_sources)) json_out = build_export_json(topic, mode, parsed, xai, dict(_last_sources)) text_out = build_export_text(topic, mode, parsed, xai) return ( results_html, json_out, text_out, f"✓ Analysis complete — {datetime.datetime.utcnow().strftime('%H:%M:%S UTC')}", chart_influence_matrix(xai["influence_matrix"]), chart_argument_attribution(xai["argument_attribution"]), chart_counterfactual(xai["counterfactual_scores"]), chart_timeline(xai["position_timeline"]), chart_radar(scores), chart_confidence(parsed), chart_heatmap(scores), ) except Exception as exc: import traceback; traceback.print_exc() return (f"

    Error: {exc}

    ", "", "", "Error") + empties # ══════════════════════════════════════════════════════════════════════════ # SECTION 14 — GRADIO UI # ══════════════════════════════════════════════════════════════════════════ SAMPLE_TOPICS = [ "The long-term effects of social media on adolescent mental health", "Whether large language models can achieve genuine reasoning", "The effectiveness of universal basic income programs", "Genetic engineering in agriculture: risks and benefits", "The causal relationship between economic inequality and crime", ] SAMPLE_CLAIMS = [ ["Social media and mental health", "Social media use is directly correlated with increased rates of depression and anxiety in teenagers, as evidenced by multiple longitudinal studies."], ["LLM reasoning capabilities", "Current large language models do not perform genuine logical reasoning — they pattern-match on training data and cannot reliably solve novel problems."], ["Universal basic income", "UBI pilot programs have consistently shown that unconditional cash transfers reduce poverty without reducing workforce participation."], ] gr.close_all() with gr.Blocks(theme=gr.themes.Base(), css=PRODUCT_CSS + """ .gradio-container{max-width:1100px!important;margin:0 auto} button.primary{background:#22c55e!important;color:#0a0f1e!important; font-family:'DM Mono',monospace!important;font-size:.8rem!important; letter-spacing:.1em!important;border-radius:8px!important} .gr-form,.gr-box{background:#0f1729!important;border-color:#1a2744!important} label{font-family:'DM Mono',monospace!important;font-size:.72rem!important; letter-spacing:.1em!important;color:#4a5568!important;text-transform:uppercase!important} textarea,input{background:#080d1a!important;border-color:#1a2744!important; color:#c9d1e0!important;font-family:'DM Sans',sans-serif!important} """) as demo: gr.HTML(f"""
    Multi Agent System
    Research Verification · Open Source · Powered by Mistral API ({MISTRAL_MODEL})
    Submit a research topic or claim. Four independent AI agents — Proposer, Opposer, Critic, and Synthesizer — analyse it using RAG-grounded evidence and produce a calibrated, bias-audited verdict with full xAI transparency.
    ◆ Proposer — evidence-based support
    ◆ Opposer — evidence-based refutation
    ◆ Critic — logical consistency audit
    ◆ Synthesizer — calibrated final verdict
    """) with gr.Tabs(): with gr.TabItem("Research Topic"): topic_input = gr.Textbox( placeholder="e.g. The long-term effects of social media on adolescent mental health", label="Research Topic or Question", lines=2 ) gr.Examples(examples=SAMPLE_TOPICS, inputs=topic_input, label="Example Topics") tab_mode_1 = gr.State("Research Topic") ext_1 = gr.State("") run_btn_1 = gr.Button("Run Analysis", variant="primary", size="lg") with gr.TabItem("Verify Claim / AI Output"): gr.HTML('
    Paste any claim, AI-generated text, or research statement. The system will audit its accuracy and bias.
    ') topic_input_2 = gr.Textbox(placeholder="e.g. Effects of social media on mental health", label="Topic / Context of the Claim", lines=1) claim_input = gr.Textbox(placeholder="Paste the claim or AI-generated text to verify...", label="Claim or AI Output to Verify", lines=4) gr.Examples(examples=SAMPLE_CLAIMS, inputs=[topic_input_2, claim_input], label="Example Claims") tab_mode_2 = gr.State("Verify Claim / AI Output") run_btn_2 = gr.Button("Verify This Claim", variant="primary", size="lg") status_bar = gr.Textbox(value="", label="", interactive=False, placeholder="Ready — enter a topic and click Run") gr.HTML('
    Results
    ') results_output = gr.HTML() gr.HTML('
    xAI Visualisation
    ') with gr.Row(): influence_plot = gr.Plot(label="Influence Matrix") arg_attr_plot = gr.Plot(label="Argument Attribution") with gr.Row(): cf_plot = gr.Plot(label="Counterfactual Causal Score") timeline_plot = gr.Plot(label="Confidence Timeline") with gr.Row(): radar_plot = gr.Plot(label="Agent Rigor Radar") conf_plot = gr.Plot(label="Confidence Levels") heatmap_plot = gr.Plot(label="Rigor Heatmap") gr.HTML('
    Export for Research Use
    ') gr.HTML('
    Export results as structured JSON or plain text for use in research papers or datasets.
    ') with gr.Row(): json_output = gr.Code(label="JSON Export", language="json", lines=12, interactive=False) text_output = gr.Textbox(label="Plain Text Export", lines=12, interactive=False) all_outputs = [results_output, json_output, text_output, status_bar, influence_plot, arg_attr_plot, cf_plot, timeline_plot, radar_plot, conf_plot, heatmap_plot] def _run_tab1(topic, mode, ext): return run_mas(topic, ext, mode) def _run_tab2(topic, claim, mode): return run_mas(topic, claim, mode) run_btn_1.click(fn=_run_tab1, inputs=[topic_input, tab_mode_1, ext_1], outputs=all_outputs) topic_input.submit(fn=_run_tab1, inputs=[topic_input, tab_mode_1, ext_1], outputs=all_outputs) run_btn_2.click(fn=_run_tab2, inputs=[topic_input_2, claim_input, tab_mode_2], outputs=all_outputs) demo.launch(share=True, show_error=True)