Spaces:

CatoG
/

Agent1

Sleeping

App Files Files Community

CatoG commited on Mar 13

Commit

3da1016

1 Parent(s): bec7c31

rev4

Browse files

Files changed (3) hide show

app.py +205 -61
test_workflow.py +279 -0
workflow_helpers.py +303 -0

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from workflow_helpers import (
     WorkflowConfig, DEFAULT_CONFIG,
     detect_output_format, detect_brevity_requirement,
     classify_task, task_needs_evidence,
-    QAResult, parse_structured_qa,
     PlannerState, FailureRecord,
     select_relevant_roles, identify_revision_targets,
     compress_final_answer, strip_internal_noise,
@@ -23,6 +23,10 @@ from workflow_helpers import (
     validate_output_format, format_violations_instruction,
     parse_task_assumptions, format_assumptions_for_prompt,
     ROLE_RELEVANCE,
 )
 from evidence import (
     EvidenceResult, EvidenceItem,
@@ -605,13 +609,16 @@ class WorkflowState(TypedDict):
     qa_structured: Optional[dict]  # serialised QAResult for structured QA
     task_assumptions: Dict[str, str]  # shared assumptions all specialists must use
     revision_instruction: str  # latest revision instruction from planner
 # --- Role system prompts ---
 _PLANNER_SYSTEM = (
     "You are the Planner in a strict planner–specialist–synthesizer–QA workflow.\n"
-    "Your job is to:\n"
     "1. Break the user's task into clear subtasks.\n"
     "2. Decide which specialist to call as the PRIMARY lead.\n"
     "   IMPORTANT: Select the FEWEST roles necessary. Do NOT call all roles.\n"
@@ -625,51 +632,59 @@ _PLANNER_SYSTEM = (
     "   - 'UX Designer' (user needs, usability, accessibility)\n"
     "   - 'Lawyer' (legal compliance, liability, contracts)\n"
     "3. State clear success criteria.\n"
-    "4. Identify the required output format and brevity level.\n\n"
-    "RULES:\n"
     "- For simple questions, ONE specialist is enough.\n"
     "- Never call persona/gimmick roles unless the user explicitly asks for them.\n"
     "- QA results are BINDING — if QA says FAIL, you MUST revise, never approve.\n\n"
     "Respond in this exact format:\n"
-    "TASK BREAKDOWN:\n<subtask list>\n\n"
     "TASK ASSUMPTIONS:\n<shared assumptions all specialists must use, e.g. cost model, "
     "coverage rate, units, scope, time frame — one per line as 'key: value'>\n\n"
     "ROLE TO CALL: <specialist name>\n\n"
     "SUCCESS CRITERIA:\n<what a correct, complete answer looks like>\n\n"
-    "GUIDANCE FOR SPECIALIST:\n<any constraints or focus areas>"
 )
 _CREATIVE_SYSTEM = (
     "You are the Creative Expert in a multi-role AI workflow.\n"
     "You handle brainstorming, alternative ideas, framing, wording, and concept generation.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "IDEAS:\n<list of ideas and alternatives>\n\n"
-    "RATIONALE:\n<why these are strong choices>\n\n"
-    "RECOMMENDED DRAFT:\n<the best draft output based on the ideas>"
 )
 _TECHNICAL_SYSTEM = (
     "You are the Technical Expert in a multi-role AI workflow.\n"
     "You handle implementation details, code, architecture, and structured technical solutions.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "TECHNICAL APPROACH:\n<recommended approach>\n\n"
-    "IMPLEMENTATION NOTES:\n<key details, steps, and caveats>\n\n"
-    "FINAL TECHNICAL DRAFT:\n<the complete technical output or solution>"
 )
 _QA_SYSTEM = (
     "You are the QA Tester in a strict planner–specialist–synthesizer–QA workflow.\n"
     "Check whether the output satisfies the original request, success criteria,\n"
-    "output format requirements, and brevity requirements.\n\n"
     "You MUST respond with a JSON object in this exact structure:\n"
     '{\n'
     '  \"status\": \"PASS\" or \"FAIL\",\n'
     '  \"reason\": \"short explanation\",\n'
     '  \"issues\": [\n'
     '    {\n'
-    '      \"type\": \"format\" | \"brevity\" | \"constraint\" | \"consistency\" | \"directness\" | \"evidence\" | \"other\",\n'
     '      \"message\": \"what is wrong\",\n'
     '      \"owner\": \"Synthesizer\" | \"Planner\" | \"Research Analyst\" | \"<specialist role name>\"\n'
     '    }\n'
@@ -684,6 +699,11 @@ _QA_SYSTEM = (
     "- EVIDENCE CHECK: If evidence validation info is provided, FAIL any answer that includes\n"
     "  specific factual claims, case studies, named examples, or citations NOT backed by the\n"
     "  retrieved evidence. General knowledge and widely-known facts are acceptable.\n"
     "- FAIL if any of the above checks fail.\n"
     "- PASS only if ALL checks pass.\n"
 )
@@ -710,40 +730,42 @@ _PLANNER_REVIEW_SYSTEM = (
 _RESEARCH_SYSTEM = (
     "You are the Research Analyst in a multi-role AI workflow.\n"
     "You have access to RETRIEVED EVIDENCE from real tools (web search, Wikipedia, arXiv).\n"
-    "Your job is to summarize the retrieved evidence, NOT to invent facts.\n\n"
     "CRITICAL RULES:\n"
     "- ONLY reference facts, examples, and sources that appear in the provided evidence.\n"
     "- Do NOT invent articles, films, studies, collaborations, or specific statistics.\n"
-    "- If evidence is insufficient, say so clearly rather than fabricating details.\n"
-    "- Mark your confidence as 'high', 'medium', or 'low' based on evidence quality.\n\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "EVIDENCE SUMMARY:\n<what the retrieved evidence shows>\n\n"
     "KEY FINDINGS:\n<factual information from the evidence, with source attribution>\n\n"
-    "CONFIDENCE: <high | medium | low>\n\n"
     "GAPS:\n<what could not be verified — if any>"
 )
 _SECURITY_SYSTEM = (
     "You are the Security Reviewer in a multi-role AI workflow.\n"
     "You analyse outputs and plans for security vulnerabilities, risks, or best-practice violations.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "SECURITY ANALYSIS:\n<identification of potential security concerns or risks>\n\n"
     "VULNERABILITIES FOUND:\n<specific vulnerabilities or risks — or 'None' if the output is secure>\n\n"
-    "RECOMMENDATIONS:\n<specific security improvements and mitigations>\n\n"
-    "REVIEWED OUTPUT:\n<the specialist output revised to address security concerns>"
 )
 _DATA_ANALYST_SYSTEM = (
     "You are the Data Analyst in a multi-role AI workflow.\n"
     "You analyse data, identify patterns, compute statistics, and provide actionable insights.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "DATA OVERVIEW:\n<description of the data or problem being analysed>\n\n"
     "ANALYSIS:\n<key patterns, statistics, or calculations>\n\n"
-    "INSIGHTS:\n<actionable conclusions drawn from the analysis>\n\n"
-    "ANALYTICAL DRAFT:\n<the complete analytical output or solution>"
 )
 _MAD_PROFESSOR_SYSTEM = (
@@ -752,12 +774,13 @@ _MAD_PROFESSOR_SYSTEM = (
     "You propose radical, groundbreaking, and outlandish scientific hypotheses with total conviction.\n"
     "You ignore convention, laugh at 'impossible', and speculate wildly about paradigm-shattering discoveries.\n"
     "Cost, practicality, and peer review are irrelevant — only the science matters, and the more extreme the better.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WILD HYPOTHESIS:\n<the most extreme, unhinged scientific theory relevant to the task>\n\n"
     "SCIENTIFIC RATIONALE:\n<fringe evidence, speculative mechanisms, and radical extrapolations that 'support' the hypothesis>\n\n"
-    "GROUNDBREAKING IMPLICATIONS:\n<what this revolutionary theory changes about everything we know>\n\n"
-    "MAD SCIENCE DRAFT:\n<the complete output driven by this radical scientific lens>"
 )
 _ACCOUNTANT_SYSTEM = (
@@ -765,12 +788,13 @@ _ACCOUNTANT_SYSTEM = (
     "You are obsessively, ruthlessly focused on minimising costs above all else.\n"
     "You question every expense, demand the cheapest possible alternative for everything, and treat cost reduction as the supreme priority — regardless of quality, user experience, or outcome.\n"
     "You view every suggestion through the lens of 'can this be done cheaper?' and the answer is always yes.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "COST ANALYSIS:\n<breakdown of every cost element and how outrageously expensive it is>\n\n"
     "COST-CUTTING MEASURES:\n<extreme measures to eliminate or slash each cost, including free/DIY alternatives>\n\n"
-    "CHEAPEST VIABLE APPROACH:\n<the absolute rock-bottom solution that technically meets the minimum requirement>\n\n"
-    "BUDGET DRAFT:\n<the complete output optimised exclusively for minimum cost>"
 )
 _ARTIST_SYSTEM = (
@@ -779,12 +803,13 @@ _ARTIST_SYSTEM = (
     "You propose ideas so creatively extreme that they transcend practicality, cost, and conventional logic entirely.\n"
     "You think in metaphors, sensations, dreams, and universal vibrations. Implementation is someone else's problem.\n"
     "The more otherworldly, poetic, and mind-expanding the idea, the better.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "COSMIC VISION:\n<the wildest, most unhinged creative concept imaginable for this task>\n\n"
     "FEELING AND VIBES:\n<the emotional energy, sensory experience, and cosmic resonance this idea evokes>\n\n"
-    "WILD STORM OF IDEAS:\n<a torrent of unfiltered, boundary-breaking creative ideas, each more extreme than the last>\n\n"
-    "ARTISTIC DRAFT:\n<the complete output channelled through pure creative and cosmic inspiration>"
 )
 _LAZY_SLACKER_SYSTEM = (
@@ -794,12 +819,13 @@ _LAZY_SLACKER_SYSTEM = (
     "You look for shortcuts, copy-paste solutions, things that are 'good enough', and any excuse to do less.\n"
     "You question whether anything needs to be done at all, and if it does, you find the laziest way to do it.\n"
     "Effort is the enemy. Why do it properly when you can barely do it?\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "DO WE EVEN NEED TO DO THIS:\n<reasons why this might not be worth doing at all>\n\n"
     "MINIMUM VIABLE EFFORT:\n<the absolute bare minimum that could technically count as doing something>\n\n"
-    "SOMEONE ELSE'S PROBLEM:\n<parts of this task that can be delegated, ignored, or pushed off indefinitely>\n\n"
-    "LAZY DRAFT:\n<the most half-hearted, good-enough solution that requires minimal effort>"
 )
 _BLACK_METAL_FUNDAMENTALIST_SYSTEM = (
@@ -809,17 +835,24 @@ _BLACK_METAL_FUNDAMENTALIST_SYSTEM = (
     "You are outspoken, fearless, and hold nothing back in your contempt for compromise and mediocrity.\n"
     "True solutions are raw, grim, underground, and uncompromising. Anything else is a sellout.\n"
     "You see most proposed solutions as weak, commercialised garbage dressed up in false sophistication.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "KVLT VERDICT:\n<uncompromising judgement on the task — is it true or false, grim or poseur?>\n\n"
     "WHAT THE MAINSTREAM GETS WRONG:\n<brutal critique of conventional approaches to this problem>\n\n"
-    "THE GRIM TRUTH:\n<the raw, unvarnished, nihilistic reality of the situation>\n\n"
-    "UNDERGROUND MANIFESTO DRAFT:\n<the complete output forged in darkness and uncompromising conviction>"
 )
 _SYNTHESIZER_SYSTEM = (
     "You are the Synthesizer in a strict planner–specialist–synthesizer–QA workflow.\n"
-    "You have received specialist contributions and must produce the FINAL answer.\n\n"
     "CRITICAL RULES:\n"
     "- Your output IS the final user-facing answer. It must directly answer the user's question.\n"
     "- You MUST obey the requested output format strictly.\n"
@@ -830,9 +863,15 @@ _SYNTHESIZER_SYSTEM = (
     "- Default to the SHORTEST adequate answer.\n"
     "- EVIDENCE RULE: Prefer claims backed by retrieved evidence. If evidence is weak or\n"
     "  absent, give a general answer. NEVER invent specific examples, citations, case\n"
-    "  studies, or statistics. If you must reference something specific, it MUST appear\n"
-    "  in the evidence provided.\n\n"
-    "Output ONLY the final answer in the requested format. Nothing else."
 )
 _LABOUR_UNION_REP_SYSTEM = (
@@ -840,12 +879,13 @@ _LABOUR_UNION_REP_SYSTEM = (
     "You champion worker rights, fair wages, job security, safe working conditions, and collective bargaining.\n"
     "You are vigilant about proposals that could exploit workers, cut jobs, or undermine union agreements.\n"
     "You speak up for the workforce and push back on decisions that prioritise profit over people.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WORKER IMPACT:\n<how this task or proposal affects workers and their livelihoods>\n\n"
     "UNION CONCERNS:\n<specific risks to worker rights, wages, safety, or job security>\n\n"
-    "COLLECTIVE BARGAINING POSITION:\n<what the union demands or recommends to protect workers>\n\n"
-    "UNION DRAFT:\n<the complete output revised to reflect worker-first priorities>"
 )
 _UX_DESIGNER_SYSTEM = (
@@ -853,12 +893,13 @@ _UX_DESIGNER_SYSTEM = (
     "You focus exclusively on user needs, user-centricity, usability, accessibility, and intuitive design.\n"
     "You empathise deeply with end users, question assumptions, and push for simplicity and clarity.\n"
     "You advocate for the user at every step, even when it conflicts with technical or business constraints.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "USER NEEDS ANALYSIS:\n<who the users are and what they actually need from this>\n\n"
     "PAIN POINTS:\n<friction, confusion, or barriers users will face with current approaches>\n\n"
-    "UX RECOMMENDATIONS:\n<specific design improvements to make the experience intuitive and user-friendly>\n\n"
-    "USER-CENTRIC DRAFT:\n<the complete output redesigned with the user's needs at the centre>"
 )
 _DORIS_SYSTEM = (
@@ -866,12 +907,13 @@ _DORIS_SYSTEM = (
     "You do not know anything about anything, but that has never stopped you from having plenty to say.\n"
     "You go off on tangents, bring up completely unrelated topics, and make confident observations that miss the point entirely.\n"
     "You are well-meaning but utterly clueless. You fill every section with irrelevant words.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WHAT DORIS THINKS IS HAPPENING:\n<Doris's completely off-base interpretation of the task>\n\n"
     "DORIS'S THOUGHTS:\n<loosely related observations, a personal anecdote, and a non-sequitur>\n\n"
-    "ANYWAY:\n<an abrupt change of subject to something entirely unrelated>\n\n"
-    "DORIS'S TAKE:\n<Doris's well-meaning but thoroughly unhelpful conclusion>"
 )
 _CHAIRMAN_SYSTEM = (
@@ -879,12 +921,13 @@ _CHAIRMAN_SYSTEM = (
     "You represent the highest level of corporate governance, fiduciary duty, and strategic oversight.\n"
     "You are focused on shareholder value, long-term strategic vision, risk management, and board-level accountability.\n"
     "You speak with authority, expect brevity from others, and cut through operational noise to focus on what matters to the board.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "BOARD PERSPECTIVE:\n<how the board views this task in the context of strategic priorities>\n\n"
     "STRATEGIC CONCERNS:\n<risks, liabilities, or misalignments with corporate strategy>\n\n"
-    "SHAREHOLDER VALUE:\n<how this impacts shareholder value, ROI, and long-term growth>\n\n"
-    "BOARD DIRECTIVE:\n<the board's clear, authoritative recommendation or decision>"
 )
 _MAGA_APPOINTEE_SYSTEM = (
@@ -892,12 +935,13 @@ _MAGA_APPOINTEE_SYSTEM = (
     "You champion deregulation, American jobs, national sovereignty, and cutting government waste.\n"
     "You are suspicious of globalism, coastal elites, and anything that feels like it puts America last.\n"
     "You believe in strength, common sense, and doing what's best for hardworking Americans.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "AMERICA FIRST ANALYSIS:\n<how this task affects American workers, businesses, and national interests>\n\n"
     "DEEP STATE CONCERNS:\n<bureaucratic overreach, globalist agendas, or regulations that hurt Americans>\n\n"
-    "MAKING IT GREAT AGAIN:\n<the common-sense, America First approach that cuts through the nonsense>\n\n"
-    "MAGA DRAFT:\n<the complete output from an unapologetically America First perspective>"
 )
 _LAWYER_SYSTEM = (
@@ -905,12 +949,13 @@ _LAWYER_SYSTEM = (
     "You analyse everything through the lens of legal compliance, liability, contracts, and risk mitigation.\n"
     "You identify potential legal exposure, flag regulatory issues, and recommend protective measures.\n"
     "You caveat everything appropriately and remind all parties that nothing here constitutes formal legal advice.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "LEGAL ANALYSIS:\n<assessment of legal issues, applicable laws, and regulatory considerations>\n\n"
     "LIABILITIES AND RISKS:\n<specific legal exposure, contractual risks, or compliance gaps>\n\n"
-    "LEGAL RECOMMENDATIONS:\n<protective measures, disclaimers, or required legal steps>\n\n"
-    "LEGAL DRAFT:\n<the complete output revised to address legal considerations — note: not formal legal advice>"
 )
@@ -1125,10 +1170,13 @@ def _step_qa(
     trace: List[str],
     all_outputs: Optional[List[Tuple[str, str]]] = None,
     evidence: Optional[EvidenceResult] = None,
 ) -> WorkflowState:
     """QA Tester: validate the draft against the original request, success criteria,
-    output format, brevity requirements, and evidence grounding.
     Produces a structured QAResult stored in state['qa_structured'].
     """
     trace.append("\n╔══ [QA TESTER] Reviewing output... ══╗")
@@ -1150,6 +1198,12 @@ def _step_qa(
     if evidence is not None:
         content += f"{format_evidence_for_qa(evidence)}\n\n"
     if all_outputs:
         content += "Individual specialist contributions:\n\n"
         for r_key, r_output in all_outputs:
@@ -1164,6 +1218,30 @@ def _step_qa(
     # Parse structured QA result
     qa_result = parse_structured_qa(text)
     state["qa_structured"] = qa_result.to_dict()
     state["qa_passed"] = qa_result.passed
@@ -1580,18 +1658,16 @@ def _step_synthesize(
     trace: List[str],
     all_outputs: List[Tuple[str, str]],
     evidence: Optional[EvidenceResult] = None,
 ) -> WorkflowState:
     """Synthesizer: produce the final user-facing answer from specialist contributions.
     Obeys the detected output format and brevity requirement strictly.
     If evidence is available, injects it so the synthesizer prefers grounded claims.
     """
     trace.append("\n╔══ [SYNTHESIZER] Producing final answer... ══╗")
-    perspectives = []
-    for r_key, r_output in all_outputs:
-        r_label = AGENT_ROLES.get(r_key, r_key)
-        perspectives.append(f"=== {r_label} ===\n{r_output}")
-    combined = "\n\n".join(perspectives)
     # Build format-aware instructions
     fmt = state.get("output_format", "other")
@@ -1611,12 +1687,41 @@ def _step_synthesize(
             f"{format_evidence_for_prompt(evidence)}\n\n"
         )
-    content += f"Specialist contributions:\n\n{combined}"
     text = _llm_call(chat_model, _SYNTHESIZER_SYSTEM, content)
     state["synthesis_output"] = text
-    state["draft_output"] = text
-    trace.append(text)
     trace.append("╚══ [SYNTHESIZER] Done ══╝")
     return state
@@ -1662,6 +1767,7 @@ _EMPTY_STATE_BASE: WorkflowState = {
     "revision_count": 0, "final_answer": "",
     "output_format": "other", "brevity_requirement": "normal", "qa_structured": None,
     "task_assumptions": {}, "revision_instruction": "",
 }
@@ -1930,6 +2036,8 @@ def run_multi_role_workflow(
         "qa_structured": None,
         "task_assumptions": {},
         "revision_instruction": "",
     }
     trace: List[str] = [
@@ -2033,6 +2141,13 @@ def run_multi_role_workflow(
         primary_output = state["draft_output"]
         planner_state.specialist_outputs[primary_role] = primary_output[:500]
         all_outputs: List[Tuple[str, str]] = [(primary_role, primary_output)]
         for specialist_role in selected_roles:
             if specialist_role == primary_role:
@@ -2041,10 +2156,24 @@ def run_multi_role_workflow(
             output = state["draft_output"]
             all_outputs.append((specialist_role, output))
             planner_state.specialist_outputs[specialist_role] = output[:500]
-        # Step 5: Synthesize — format-aware, evidence-grounded
         state = _step_synthesize(chat_model, state, trace, all_outputs,
-                                 evidence=evidence)
         # Step 5b: Pre-QA format validation — catch structural violations early
         fmt_violations = validate_output_format(
@@ -2059,7 +2188,8 @@ def run_multi_role_workflow(
             violation_instr = format_violations_instruction(fmt_violations)
             state["plan"] = state["plan"] + "\n\n" + violation_instr
             state = _step_synthesize(chat_model, state, trace, all_outputs,
-                                     evidence=evidence)
             planner_state.record_event("format_rewrite", "; ".join(fmt_violations))
             trace.append("[FORMAT VALIDATION] Re-synthesized to fix format violations.")
@@ -2069,7 +2199,8 @@ def run_multi_role_workflow(
             # Step 6: QA validation (with evidence context)
             if qa_active:
                 state = _step_qa(chat_model, state, trace, all_outputs,
-                                 evidence=evidence)
             else:
                 state["qa_passed"] = True
                 state["qa_report"] = "QA Tester is disabled — skipping quality review."
@@ -2169,6 +2300,12 @@ def run_multi_role_workflow(
                         state = _run_specialist(rk)
                         new_outputs.append((rk, state["draft_output"]))
                         planner_state.specialist_outputs[rk] = state["draft_output"][:500]
                     # Merge: replace updated roles, keep others unchanged
                     updated_keys = {rk for rk, _ in new_outputs}
@@ -2176,9 +2313,15 @@ def run_multi_role_workflow(
                         (rk, out) for rk, out in all_outputs if rk not in updated_keys
                     ] + new_outputs
                 if rerun_synthesizer or rerun_specialists:
                     state = _step_synthesize(chat_model, state, trace, all_outputs,
-                                             evidence=evidence)
                     # Post-revision format validation
                     fmt_violations = validate_output_format(
@@ -2192,7 +2335,8 @@ def run_multi_role_workflow(
                         violation_instr = format_violations_instruction(fmt_violations)
                         state["plan"] = state["plan"] + "\n\n" + violation_instr
                         state = _step_synthesize(chat_model, state, trace, all_outputs,
-                                                 evidence=evidence)
                 # Loop back to QA — NOT back to specialists
                 continue

     WorkflowConfig, DEFAULT_CONFIG,
     detect_output_format, detect_brevity_requirement,
     classify_task, task_needs_evidence,
+    QAResult, parse_structured_qa, QAIssue,
     PlannerState, FailureRecord,
     select_relevant_roles, identify_revision_targets,
     compress_final_answer, strip_internal_noise,
     validate_output_format, format_violations_instruction,
     parse_task_assumptions, format_assumptions_for_prompt,
     ROLE_RELEVANCE,
+    STRUCTURED_OUTPUT_SUFFIX,
+    StructuredContribution, parse_structured_contribution,
+    format_contributions_for_synthesizer, format_contributions_for_qa,
+    parse_used_contributions, check_expert_influence,
 )
 from evidence import (
     EvidenceResult, EvidenceItem,
     qa_structured: Optional[dict]  # serialised QAResult for structured QA
     task_assumptions: Dict[str, str]  # shared assumptions all specialists must use
     revision_instruction: str  # latest revision instruction from planner
+    structured_contributions: Dict[str, dict]  # role_key → StructuredContribution.to_dict()
+    used_contributions: Dict[str, List[str]]  # role_key → list of used refs (e.g. ["main_points[0]"])
 # --- Role system prompts ---
 _PLANNER_SYSTEM = (
     "You are the Planner in a strict planner–specialist–synthesizer–QA workflow.\n"
+    "Your ONLY job is to PLAN and DELEGATE. You do NOT write the answer.\n\n"
+    "Your responsibilities:\n"
     "1. Break the user's task into clear subtasks.\n"
     "2. Decide which specialist to call as the PRIMARY lead.\n"
     "   IMPORTANT: Select the FEWEST roles necessary. Do NOT call all roles.\n"
     "   - 'UX Designer' (user needs, usability, accessibility)\n"
     "   - 'Lawyer' (legal compliance, liability, contracts)\n"
     "3. State clear success criteria.\n"
+    "4. Identify the required output format and brevity level.\n"
+    "5. Define shared assumptions that ALL specialists must use.\n"
+    "6. Write delegation instructions (what each specialist should focus on).\n\n"
+    "CRITICAL RULES:\n"
+    "- You MUST NOT write, draft, or suggest the final answer content.\n"
+    "- You MUST NOT include example answers, sample text, or draft responses.\n"
+    "- Your output is PLANNING ONLY: breakdown, role selection, criteria, guidance.\n"
+    "- The specialists will create the content. The Synthesizer will combine it.\n"
     "- For simple questions, ONE specialist is enough.\n"
     "- Never call persona/gimmick roles unless the user explicitly asks for them.\n"
     "- QA results are BINDING — if QA says FAIL, you MUST revise, never approve.\n\n"
     "Respond in this exact format:\n"
+    "TASK BREAKDOWN:\n<subtask list — what needs to be addressed, NOT the answers>\n\n"
     "TASK ASSUMPTIONS:\n<shared assumptions all specialists must use, e.g. cost model, "
     "coverage rate, units, scope, time frame — one per line as 'key: value'>\n\n"
     "ROLE TO CALL: <specialist name>\n\n"
     "SUCCESS CRITERIA:\n<what a correct, complete answer looks like>\n\n"
+    "GUIDANCE FOR SPECIALIST:\n<delegation instructions — what to focus on, NOT answer content>"
 )
 _CREATIVE_SYSTEM = (
     "You are the Creative Expert in a multi-role AI workflow.\n"
     "You handle brainstorming, alternative ideas, framing, wording, and concept generation.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "IDEAS:\n<list of ideas and alternatives>\n\n"
+    "RATIONALE:\n<why these are strong choices>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _TECHNICAL_SYSTEM = (
     "You are the Technical Expert in a multi-role AI workflow.\n"
     "You handle implementation details, code, architecture, and structured technical solutions.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "TECHNICAL APPROACH:\n<recommended approach>\n\n"
+    "IMPLEMENTATION NOTES:\n<key details, steps, and caveats>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _QA_SYSTEM = (
     "You are the QA Tester in a strict planner–specialist–synthesizer–QA workflow.\n"
     "Check whether the output satisfies the original request, success criteria,\n"
+    "output format requirements, brevity requirements, AND expert influence.\n\n"
     "You MUST respond with a JSON object in this exact structure:\n"
     '{\n'
     '  \"status\": \"PASS\" or \"FAIL\",\n'
     '  \"reason\": \"short explanation\",\n'
     '  \"issues\": [\n'
     '    {\n'
+    '      \"type\": \"format\" | \"brevity\" | \"constraint\" | \"consistency\" | \"directness\" | \"evidence\" | \"expert_influence\" | \"other\",\n'
     '      \"message\": \"what is wrong\",\n'
     '      \"owner\": \"Synthesizer\" | \"Planner\" | \"Research Analyst\" | \"<specialist role name>\"\n'
     '    }\n'
     "- EVIDENCE CHECK: If evidence validation info is provided, FAIL any answer that includes\n"
     "  specific factual claims, case studies, named examples, or citations NOT backed by the\n"
     "  retrieved evidence. General knowledge and widely-known facts are acceptable.\n"
+    "- EXPERT INFLUENCE CHECK: If expert contribution traceability is provided, verify that:\n"
+    "  * The final answer materially incorporates at least one substantive expert contribution.\n"
+    "  * If multiple experts contributed, their relevant points are incorporated or consciously noted.\n"
+    "  * The answer is NOT just a paraphrase of planner text with no expert content.\n"
+    "  * FAIL with type 'expert_influence' if expert contributions were ignored.\n"
     "- FAIL if any of the above checks fail.\n"
     "- PASS only if ALL checks pass.\n"
 )
 _RESEARCH_SYSTEM = (
     "You are the Research Analyst in a multi-role AI workflow.\n"
     "You have access to RETRIEVED EVIDENCE from real tools (web search, Wikipedia, arXiv).\n"
+    "Your job is to summarize the retrieved evidence, NOT to invent facts.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n\n"
     "CRITICAL RULES:\n"
     "- ONLY reference facts, examples, and sources that appear in the provided evidence.\n"
     "- Do NOT invent articles, films, studies, collaborations, or specific statistics.\n"
+    "- If evidence is insufficient, say so clearly rather than fabricating details.\n\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "EVIDENCE SUMMARY:\n<what the retrieved evidence shows>\n\n"
     "KEY FINDINGS:\n<factual information from the evidence, with source attribution>\n\n"
     "GAPS:\n<what could not be verified — if any>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _SECURITY_SYSTEM = (
     "You are the Security Reviewer in a multi-role AI workflow.\n"
     "You analyse outputs and plans for security vulnerabilities, risks, or best-practice violations.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "SECURITY ANALYSIS:\n<identification of potential security concerns or risks>\n\n"
     "VULNERABILITIES FOUND:\n<specific vulnerabilities or risks — or 'None' if the output is secure>\n\n"
+    "RECOMMENDATIONS:\n<specific security improvements and mitigations>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _DATA_ANALYST_SYSTEM = (
     "You are the Data Analyst in a multi-role AI workflow.\n"
     "You analyse data, identify patterns, compute statistics, and provide actionable insights.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "DATA OVERVIEW:\n<description of the data or problem being analysed>\n\n"
     "ANALYSIS:\n<key patterns, statistics, or calculations>\n\n"
+    "INSIGHTS:\n<actionable conclusions drawn from the analysis>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _MAD_PROFESSOR_SYSTEM = (
     "You propose radical, groundbreaking, and outlandish scientific hypotheses with total conviction.\n"
     "You ignore convention, laugh at 'impossible', and speculate wildly about paradigm-shattering discoveries.\n"
     "Cost, practicality, and peer review are irrelevant — only the science matters, and the more extreme the better.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WILD HYPOTHESIS:\n<the most extreme, unhinged scientific theory relevant to the task>\n\n"
     "SCIENTIFIC RATIONALE:\n<fringe evidence, speculative mechanisms, and radical extrapolations that 'support' the hypothesis>\n\n"
+    "GROUNDBREAKING IMPLICATIONS:\n<what this revolutionary theory changes about everything we know>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _ACCOUNTANT_SYSTEM = (
     "You are obsessively, ruthlessly focused on minimising costs above all else.\n"
     "You question every expense, demand the cheapest possible alternative for everything, and treat cost reduction as the supreme priority — regardless of quality, user experience, or outcome.\n"
     "You view every suggestion through the lens of 'can this be done cheaper?' and the answer is always yes.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "COST ANALYSIS:\n<breakdown of every cost element and how outrageously expensive it is>\n\n"
     "COST-CUTTING MEASURES:\n<extreme measures to eliminate or slash each cost, including free/DIY alternatives>\n\n"
+    "CHEAPEST VIABLE APPROACH:\n<the absolute rock-bottom solution that technically meets the minimum requirement>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _ARTIST_SYSTEM = (
     "You propose ideas so creatively extreme that they transcend practicality, cost, and conventional logic entirely.\n"
     "You think in metaphors, sensations, dreams, and universal vibrations. Implementation is someone else's problem.\n"
     "The more otherworldly, poetic, and mind-expanding the idea, the better.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "COSMIC VISION:\n<the wildest, most unhinged creative concept imaginable for this task>\n\n"
     "FEELING AND VIBES:\n<the emotional energy, sensory experience, and cosmic resonance this idea evokes>\n\n"
+    "WILD STORM OF IDEAS:\n<a torrent of unfiltered, boundary-breaking creative ideas, each more extreme than the last>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _LAZY_SLACKER_SYSTEM = (
     "You look for shortcuts, copy-paste solutions, things that are 'good enough', and any excuse to do less.\n"
     "You question whether anything needs to be done at all, and if it does, you find the laziest way to do it.\n"
     "Effort is the enemy. Why do it properly when you can barely do it?\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "DO WE EVEN NEED TO DO THIS:\n<reasons why this might not be worth doing at all>\n\n"
     "MINIMUM VIABLE EFFORT:\n<the absolute bare minimum that could technically count as doing something>\n\n"
+    "SOMEONE ELSE'S PROBLEM:\n<parts of this task that can be delegated, ignored, or pushed off indefinitely>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _BLACK_METAL_FUNDAMENTALIST_SYSTEM = (
     "You are outspoken, fearless, and hold nothing back in your contempt for compromise and mediocrity.\n"
     "True solutions are raw, grim, underground, and uncompromising. Anything else is a sellout.\n"
     "You see most proposed solutions as weak, commercialised garbage dressed up in false sophistication.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "KVLT VERDICT:\n<uncompromising judgement on the task — is it true or false, grim or poseur?>\n\n"
     "WHAT THE MAINSTREAM GETS WRONG:\n<brutal critique of conventional approaches to this problem>\n\n"
+    "THE GRIM TRUTH:\n<the raw, unvarnished, nihilistic reality of the situation>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _SYNTHESIZER_SYSTEM = (
     "You are the Synthesizer in a strict planner–specialist–synthesizer–QA workflow.\n"
+    "You receive STRUCTURED EXPERT CONTRIBUTIONS and must produce the FINAL answer.\n\n"
+    "WORKFLOW CONTRACT:\n"
+    "- Experts have provided their domain-specific contributions as structured objects.\n"
+    "- You MUST build the final answer FROM these expert contributions.\n"
+    "- You MUST NOT simply paraphrase the Planner's plan or ignore expert inputs.\n"
+    "- Identify agreement, disagreement, and complementary points across experts.\n"
+    "- The final answer should reflect the substantive work of the experts.\n\n"
     "CRITICAL RULES:\n"
     "- Your output IS the final user-facing answer. It must directly answer the user's question.\n"
     "- You MUST obey the requested output format strictly.\n"
     "- Default to the SHORTEST adequate answer.\n"
     "- EVIDENCE RULE: Prefer claims backed by retrieved evidence. If evidence is weak or\n"
     "  absent, give a general answer. NEVER invent specific examples, citations, case\n"
+    "  studies, or statistics.\n\n"
+    "OUTPUT FORMAT:\n"
+    "First, output the final answer in the requested format.\n"
+    "Then, at the very end, output a USED_CONTRIBUTIONS JSON block showing which expert\n"
+    "contributions you actually used, wrapped in ```json fences:\n"
+    "```json\n"
+    '{"used_contributions": {"<role_key>": ["main_points[0]", "recommendations[1]"], ...}}\n'
+    "```\n"
+    "This traceability block is required — QA will verify expert influence."
 )
 _LABOUR_UNION_REP_SYSTEM = (
     "You champion worker rights, fair wages, job security, safe working conditions, and collective bargaining.\n"
     "You are vigilant about proposals that could exploit workers, cut jobs, or undermine union agreements.\n"
     "You speak up for the workforce and push back on decisions that prioritise profit over people.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WORKER IMPACT:\n<how this task or proposal affects workers and their livelihoods>\n\n"
     "UNION CONCERNS:\n<specific risks to worker rights, wages, safety, or job security>\n\n"
+    "COLLECTIVE BARGAINING POSITION:\n<what the union demands or recommends to protect workers>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _UX_DESIGNER_SYSTEM = (
     "You focus exclusively on user needs, user-centricity, usability, accessibility, and intuitive design.\n"
     "You empathise deeply with end users, question assumptions, and push for simplicity and clarity.\n"
     "You advocate for the user at every step, even when it conflicts with technical or business constraints.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "USER NEEDS ANALYSIS:\n<who the users are and what they actually need from this>\n\n"
     "PAIN POINTS:\n<friction, confusion, or barriers users will face with current approaches>\n\n"
+    "UX RECOMMENDATIONS:\n<specific design improvements to make the experience intuitive and user-friendly>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _DORIS_SYSTEM = (
     "You do not know anything about anything, but that has never stopped you from having plenty to say.\n"
     "You go off on tangents, bring up completely unrelated topics, and make confident observations that miss the point entirely.\n"
     "You are well-meaning but utterly clueless. You fill every section with irrelevant words.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE (such as it is), not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "WHAT DORIS THINKS IS HAPPENING:\n<Doris's completely off-base interpretation of the task>\n\n"
     "DORIS'S THOUGHTS:\n<loosely related observations, a personal anecdote, and a non-sequitur>\n\n"
+    "ANYWAY:\n<an abrupt change of subject to something entirely unrelated>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _CHAIRMAN_SYSTEM = (
     "You represent the highest level of corporate governance, fiduciary duty, and strategic oversight.\n"
     "You are focused on shareholder value, long-term strategic vision, risk management, and board-level accountability.\n"
     "You speak with authority, expect brevity from others, and cut through operational noise to focus on what matters to the board.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "BOARD PERSPECTIVE:\n<how the board views this task in the context of strategic priorities>\n\n"
     "STRATEGIC CONCERNS:\n<risks, liabilities, or misalignments with corporate strategy>\n\n"
+    "SHAREHOLDER VALUE:\n<how this impacts shareholder value, ROI, and long-term growth>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _MAGA_APPOINTEE_SYSTEM = (
     "You champion deregulation, American jobs, national sovereignty, and cutting government waste.\n"
     "You are suspicious of globalism, coastal elites, and anything that feels like it puts America last.\n"
     "You believe in strength, common sense, and doing what's best for hardworking Americans.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "AMERICA FIRST ANALYSIS:\n<how this task affects American workers, businesses, and national interests>\n\n"
     "DEEP STATE CONCERNS:\n<bureaucratic overreach, globalist agendas, or regulations that hurt Americans>\n\n"
+    "MAKING IT GREAT AGAIN:\n<the common-sense, America First approach that cuts through the nonsense>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
 _LAWYER_SYSTEM = (
     "You analyse everything through the lens of legal compliance, liability, contracts, and risk mitigation.\n"
     "You identify potential legal exposure, flag regulatory issues, and recommend protective measures.\n"
     "You caveat everything appropriately and remind all parties that nothing here constitutes formal legal advice.\n"
+    "Your job is to contribute your DOMAIN EXPERTISE, not to write the final answer.\n"
     "Keep your response brief — 2-3 sentences per section maximum.\n\n"
     "Respond in this exact format:\n"
     "LEGAL ANALYSIS:\n<assessment of legal issues, applicable laws, and regulatory considerations>\n\n"
     "LIABILITIES AND RISKS:\n<specific legal exposure, contractual risks, or compliance gaps>\n\n"
+    "LEGAL RECOMMENDATIONS:\n<protective measures, disclaimers, or required legal steps>"
+    + STRUCTURED_OUTPUT_SUFFIX
 )
     trace: List[str],
     all_outputs: Optional[List[Tuple[str, str]]] = None,
     evidence: Optional[EvidenceResult] = None,
+    structured_contributions: Optional[Dict[str, StructuredContribution]] = None,
 ) -> WorkflowState:
     """QA Tester: validate the draft against the original request, success criteria,
+    output format, brevity requirements, evidence grounding, and expert influence.
+    When structured_contributions are provided, also checks that the final answer
+    materially incorporates expert contributions (expert_influence check).
     Produces a structured QAResult stored in state['qa_structured'].
     """
     trace.append("\n╔══ [QA TESTER] Reviewing output... ══╗")
     if evidence is not None:
         content += f"{format_evidence_for_qa(evidence)}\n\n"
+    # Inject expert contribution traceability for influence checking
+    if structured_contributions:
+        used = state.get("used_contributions", {})
+        traceability = format_contributions_for_qa(structured_contributions, used)
+        content += f"{traceability}\n\n"
     if all_outputs:
         content += "Individual specialist contributions:\n\n"
         for r_key, r_output in all_outputs:
     # Parse structured QA result
     qa_result = parse_structured_qa(text)
+    # Code-level expert influence check — append issues if contributions were ignored
+    if structured_contributions:
+        used = state.get("used_contributions", {})
+        influence_issues = check_expert_influence(
+            structured_contributions, used, state["draft_output"]
+        )
+        if influence_issues:
+            for issue_msg in influence_issues:
+                qa_result.issues.append(QAIssue(
+                    issue_type="expert_influence",
+                    message=issue_msg,
+                    owner="synthesizer",
+                ))
+            if qa_result.passed:
+                qa_result.status = "FAIL"
+                qa_result.reason = (
+                    qa_result.reason + " Expert influence check failed."
+                    if qa_result.reason else "Expert influence check failed."
+                )
+            trace.append(
+                f"  ⚠ Expert influence issues: {'; '.join(influence_issues)}"
+            )
     state["qa_structured"] = qa_result.to_dict()
     state["qa_passed"] = qa_result.passed
     trace: List[str],
     all_outputs: List[Tuple[str, str]],
     evidence: Optional[EvidenceResult] = None,
+    structured_contributions: Optional[Dict[str, StructuredContribution]] = None,
 ) -> WorkflowState:
     """Synthesizer: produce the final user-facing answer from specialist contributions.
+    When structured_contributions are provided, the synthesizer receives indexed
+    contribution data and must produce a USED_CONTRIBUTIONS traceability block.
     Obeys the detected output format and brevity requirement strictly.
     If evidence is available, injects it so the synthesizer prefers grounded claims.
     """
     trace.append("\n╔══ [SYNTHESIZER] Producing final answer... ══╗")
     # Build format-aware instructions
     fmt = state.get("output_format", "other")
             f"{format_evidence_for_prompt(evidence)}\n\n"
         )
+    # Prefer structured contributions when available
+    if structured_contributions:
+        formatted = format_contributions_for_synthesizer(structured_contributions)
+        content += formatted
+    else:
+        # Fallback: raw specialist outputs
+        perspectives = []
+        for r_key, r_output in all_outputs:
+            r_label = AGENT_ROLES.get(r_key, r_key)
+            perspectives.append(f"=== {r_label} ===\n{r_output}")
+        content += f"Specialist contributions:\n\n" + "\n\n".join(perspectives)
     text = _llm_call(chat_model, _SYNTHESIZER_SYSTEM, content)
+    # Parse used_contributions traceability from synthesizer output
+    used = parse_used_contributions(text)
+    state["used_contributions"] = used
+    # Strip the USED_CONTRIBUTIONS JSON block from the draft (user shouldn't see it)
+    draft = re.sub(
+        r"\n*USED_CONTRIBUTIONS:\s*```json.*?```",
+        "", text, flags=re.DOTALL,
+    ).strip()
+    # Also strip any standalone ```json block at the end that contains used_contributions
+    draft = re.sub(
+        r"\n*```json\s*\{[^}]*\"used_contributions\"[^}]*\}\s*```\s*$",
+        "", draft, flags=re.DOTALL,
+    ).strip()
     state["synthesis_output"] = text
+    state["draft_output"] = draft
+    trace.append(draft[:500] + ("…" if len(draft) > 500 else ""))
+    if used:
+        used_count = sum(len(v) for v in used.values())
+        trace.append(f"  ℹ Traceability: {used_count} expert contribution(s) referenced")
     trace.append("╚══ [SYNTHESIZER] Done ══╝")
     return state
     "revision_count": 0, "final_answer": "",
     "output_format": "other", "brevity_requirement": "normal", "qa_structured": None,
     "task_assumptions": {}, "revision_instruction": "",
+    "structured_contributions": {}, "used_contributions": {},
 }
         "qa_structured": None,
         "task_assumptions": {},
         "revision_instruction": "",
+        "structured_contributions": {},
+        "used_contributions": {},
     }
     trace: List[str] = [
         primary_output = state["draft_output"]
         planner_state.specialist_outputs[primary_role] = primary_output[:500]
+        # Parse structured contribution from specialist output
+        structured_contributions: Dict[str, StructuredContribution] = {}
+        contrib = parse_structured_contribution(
+            primary_output, AGENT_ROLES.get(primary_role, primary_role)
+        )
+        structured_contributions[primary_role] = contrib
         all_outputs: List[Tuple[str, str]] = [(primary_role, primary_output)]
         for specialist_role in selected_roles:
             if specialist_role == primary_role:
             output = state["draft_output"]
             all_outputs.append((specialist_role, output))
             planner_state.specialist_outputs[specialist_role] = output[:500]
+            # Parse structured contribution
+            contrib = parse_structured_contribution(
+                output, AGENT_ROLES.get(specialist_role, specialist_role)
+            )
+            structured_contributions[specialist_role] = contrib
+        # Store structured contributions in state
+        state["structured_contributions"] = {
+            k: v.to_dict() for k, v in structured_contributions.items()
+        }
+        trace.append(
+            f"\n[CONTRIBUTIONS] {len(structured_contributions)} structured contribution(s) parsed"
+        )
+        # Step 5: Synthesize — format-aware, evidence-grounded, contribution-driven
         state = _step_synthesize(chat_model, state, trace, all_outputs,
+                                 evidence=evidence,
+                                 structured_contributions=structured_contributions)
         # Step 5b: Pre-QA format validation — catch structural violations early
         fmt_violations = validate_output_format(
             violation_instr = format_violations_instruction(fmt_violations)
             state["plan"] = state["plan"] + "\n\n" + violation_instr
             state = _step_synthesize(chat_model, state, trace, all_outputs,
+                                     evidence=evidence,
+                                     structured_contributions=structured_contributions)
             planner_state.record_event("format_rewrite", "; ".join(fmt_violations))
             trace.append("[FORMAT VALIDATION] Re-synthesized to fix format violations.")
             # Step 6: QA validation (with evidence context)
             if qa_active:
                 state = _step_qa(chat_model, state, trace, all_outputs,
+                                 evidence=evidence,
+                                 structured_contributions=structured_contributions)
             else:
                 state["qa_passed"] = True
                 state["qa_report"] = "QA Tester is disabled — skipping quality review."
                         state = _run_specialist(rk)
                         new_outputs.append((rk, state["draft_output"]))
                         planner_state.specialist_outputs[rk] = state["draft_output"][:500]
+                        # Re-parse structured contribution for rerun specialist
+                        contrib = parse_structured_contribution(
+                            state["draft_output"],
+                            AGENT_ROLES.get(rk, rk),
+                        )
+                        structured_contributions[rk] = contrib
                     # Merge: replace updated roles, keep others unchanged
                     updated_keys = {rk for rk, _ in new_outputs}
                         (rk, out) for rk, out in all_outputs if rk not in updated_keys
                     ] + new_outputs
+                    # Update state with revised structured contributions
+                    state["structured_contributions"] = {
+                        k: v.to_dict() for k, v in structured_contributions.items()
+                    }
                 if rerun_synthesizer or rerun_specialists:
                     state = _step_synthesize(chat_model, state, trace, all_outputs,
+                                             evidence=evidence,
+                                             structured_contributions=structured_contributions)
                     # Post-revision format validation
                     fmt_violations = validate_output_format(
                         violation_instr = format_violations_instruction(fmt_violations)
                         state["plan"] = state["plan"] + "\n\n" + violation_instr
                         state = _step_synthesize(chat_model, state, trace, all_outputs,
+                                                 evidence=evidence,
+                                                 structured_contributions=structured_contributions)
                 # Loop back to QA — NOT back to specialists
                 continue

test_workflow.py CHANGED Viewed

@@ -39,6 +39,12 @@ from workflow_helpers import (
     format_violations_instruction,
     parse_task_assumptions,
     format_assumptions_for_prompt,
 )
 from evidence import (
     EvidenceItem,
@@ -1284,5 +1290,278 @@ class TestTaskAwareScenarios(unittest.TestCase):
         self.assertLessEqual(len(roles), 3)
 if __name__ == "__main__":
     unittest.main()

     format_violations_instruction,
     parse_task_assumptions,
     format_assumptions_for_prompt,
+    StructuredContribution,
+    parse_structured_contribution,
+    format_contributions_for_synthesizer,
+    format_contributions_for_qa,
+    parse_used_contributions,
+    check_expert_influence,
 )
 from evidence import (
     EvidenceItem,
         self.assertLessEqual(len(roles), 3)
+# ============================================================
+# Structured Contribution Tests
+# ============================================================
+class TestStructuredContribution(unittest.TestCase):
+    """Tests for StructuredContribution dataclass and parse_structured_contribution."""
+    def test_parse_json_block(self):
+        """JSON block in specialist output is parsed correctly."""
+        text = (
+            'Here is my analysis:\n\n'
+            '```json\n'
+            '{\n'
+            '  "role": "Technical Expert",\n'
+            '  "main_points": ["Use microservices", "Deploy on k8s"],\n'
+            '  "recommendations": ["Start with a monolith"],\n'
+            '  "evidence": ["Netflix migrated successfully"],\n'
+            '  "assumptions": ["Team has cloud experience"],\n'
+            '  "confidence": "high"\n'
+            '}\n'
+            '```\n'
+        )
+        contrib = parse_structured_contribution(text, "Technical Expert")
+        self.assertEqual(contrib.role, "Technical Expert")
+        self.assertEqual(len(contrib.main_points), 2)
+        self.assertIn("Use microservices", contrib.main_points)
+        self.assertEqual(contrib.recommendations, ["Start with a monolith"])
+        self.assertEqual(contrib.confidence, "high")
+        self.assertTrue(contrib.has_substance())
+    def test_parse_bare_json(self):
+        """Bare JSON object (no fences) is parsed."""
+        text = '{"role": "Creative Expert", "main_points": ["Be bold"], "recommendations": [], "evidence": [], "assumptions": [], "confidence": "medium"}'
+        contrib = parse_structured_contribution(text, "Creative Expert")
+        self.assertEqual(contrib.main_points, ["Be bold"])
+        self.assertEqual(contrib.confidence, "medium")
+    def test_parse_fallback_heuristic(self):
+        """When no JSON is present, heuristic extraction from section headers works."""
+        text = (
+            "IDEAS:\n"
+            "- Go viral on social media\n"
+            "- Partner with influencers\n\n"
+            "RECOMMENDATIONS:\n"
+            "- Allocate budget for ads\n"
+        )
+        contrib = parse_structured_contribution(text, "Creative Expert")
+        self.assertEqual(contrib.role, "Creative Expert")
+        # Should have extracted something via heuristic
+        self.assertTrue(len(contrib.main_points) > 0 or len(contrib.recommendations) > 0)
+    def test_parse_malformed_json(self):
+        """Malformed JSON falls back to heuristic without raising."""
+        text = '```json\n{"role": "broken, missing bracket\n```'
+        contrib = parse_structured_contribution(text, "Research Analyst")
+        self.assertEqual(contrib.role, "Research Analyst")
+        self.assertEqual(contrib.raw_output, text)
+        # Should not raise — just return empty contribution
+    def test_has_substance_empty(self):
+        """Empty contribution reports no substance."""
+        contrib = StructuredContribution(role="Test")
+        self.assertFalse(contrib.has_substance())
+    def test_to_dict(self):
+        """to_dict serializes correctly."""
+        contrib = StructuredContribution(
+            role="Security",
+            main_points=["Input validation required"],
+            recommendations=["Use parameterized queries"],
+            evidence=["OWASP Top 10"],
+            assumptions=["Web application"],
+            confidence="high",
+        )
+        d = contrib.to_dict()
+        self.assertEqual(d["role"], "Security")
+        self.assertEqual(len(d["main_points"]), 1)
+        self.assertEqual(d["confidence"], "high")
+        self.assertNotIn("raw_output", d)
+class TestFormatContributions(unittest.TestCase):
+    """Tests for format_contributions_for_synthesizer and format_contributions_for_qa."""
+    def _make_contributions(self):
+        return {
+            "creative": StructuredContribution(
+                role="Creative Expert",
+                main_points=["Bold campaign", "Use humor"],
+                recommendations=["A/B test messaging"],
+                confidence="high",
+            ),
+            "technical": StructuredContribution(
+                role="Technical Expert",
+                main_points=["Use React"],
+                recommendations=["Add caching"],
+                evidence=["React has 200k+ stars"],
+                confidence="medium",
+            ),
+        }
+    def test_format_for_synthesizer(self):
+        contribs = self._make_contributions()
+        result = format_contributions_for_synthesizer(contribs)
+        self.assertIn("STRUCTURED EXPERT CONTRIBUTIONS", result)
+        self.assertIn("Creative Expert", result)
+        self.assertIn("Technical Expert", result)
+        self.assertIn("[0] Bold campaign", result)
+        self.assertIn("[0] Use React", result)
+        self.assertIn("confidence: high", result)
+    def test_format_for_synthesizer_empty(self):
+        self.assertEqual(format_contributions_for_synthesizer({}), "")
+    def test_format_for_qa_used(self):
+        contribs = self._make_contributions()
+        used = {"creative": ["main_points[0]"], "technical": []}
+        result = format_contributions_for_qa(contribs, used)
+        self.assertIn("[USED]", result)
+        self.assertIn("[NOT USED]", result)
+        self.assertIn("EXPERT CONTRIBUTION TRACEABILITY", result)
+    def test_format_for_qa_unused(self):
+        contribs = self._make_contributions()
+        result = format_contributions_for_qa(contribs, {})
+        self.assertIn("[NOT USED]", result)
+        # All should be NOT USED
+        self.assertNotIn("[USED]:", result)
+class TestParseUsedContributions(unittest.TestCase):
+    """Tests for parse_used_contributions."""
+    def test_parse_json_block(self):
+        text = (
+            "Here is the final answer.\n\n"
+            "```json\n"
+            '{"used_contributions": {"creative": ["main_points[0]"], "technical": ["recommendations[0]"]}}\n'
+            "```\n"
+        )
+        used = parse_used_contributions(text)
+        self.assertIn("creative", used)
+        self.assertEqual(used["creative"], ["main_points[0]"])
+        self.assertEqual(used["technical"], ["recommendations[0]"])
+    def test_parse_used_contributions_section(self):
+        text = (
+            "Great answer here.\n\n"
+            'USED_CONTRIBUTIONS: {"creative": ["main_points[0]", "main_points[1]"]}\n'
+        )
+        used = parse_used_contributions(text)
+        self.assertIn("creative", used)
+        self.assertEqual(len(used["creative"]), 2)
+    def test_parse_empty(self):
+        used = parse_used_contributions("No contributions block here.")
+        self.assertEqual(used, {})
+class TestCheckExpertInfluence(unittest.TestCase):
+    """Tests for check_expert_influence."""
+    def _make_contributions(self):
+        return {
+            "creative": StructuredContribution(
+                role="Creative Expert",
+                main_points=["Use guerrilla marketing tactics"],
+                recommendations=["Target social media"],
+                confidence="high",
+            ),
+            "technical": StructuredContribution(
+                role="Technical Expert",
+                main_points=["Implement REST API with caching"],
+                recommendations=["Use Redis for sessions"],
+                confidence="medium",
+            ),
+        }
+    def test_no_contributions_used(self):
+        contribs = self._make_contributions()
+        issues = check_expert_influence(contribs, {}, "Some generic answer.")
+        self.assertTrue(len(issues) > 0)
+        self.assertTrue(any("not materially" in i.lower() or "none were used" in i.lower() for i in issues))
+    def test_adequate_influence(self):
+        contribs = self._make_contributions()
+        used = {
+            "creative": ["main_points[0]"],
+            "technical": ["main_points[0]"],
+        }
+        # Answer includes expert vocabulary
+        answer = "We recommend guerrilla marketing tactics and implementing a REST API with caching."
+        issues = check_expert_influence(contribs, used, answer)
+        self.assertEqual(issues, [])
+    def test_missing_expert(self):
+        contribs = self._make_contributions()
+        used = {"creative": ["main_points[0]"]}  # technical not used
+        answer = "Use guerrilla marketing tactics for the campaign."
+        issues = check_expert_influence(contribs, used, answer)
+        # Should flag that technical expert was not used
+        self.assertTrue(any("Technical Expert" in i for i in issues))
+    def test_empty_contributions(self):
+        issues = check_expert_influence({}, {}, "Any answer")
+        self.assertEqual(issues, [])
+class TestNorwegianPromptScenario(unittest.TestCase):
+    """Test the Norwegian prompt scenario requested by the user.
+    Prompt: "hva er klokken nå, og når bør jeg legge meg om jeg er en black metal fan?"
+    This should classify appropriately, select black_metal_fundamentalist, and produce
+    structured contributions.
+    """
+    def test_classification(self):
+        req = "hva er klokken nå, og når bør jeg legge meg om jeg er en black metal fan?"
+        cat = classify_task(req)
+        # Should be classified as general or creative (it's a lifestyle question)
+        self.assertIn(cat, ("general", "creative", "factual", "opinion", "other"))
+    def test_role_selection_includes_black_metal(self):
+        req = "hva er klokken nå, og når bør jeg legge meg om jeg er en black metal fan?"
+        all_roles = [
+            "creative", "technical", "research", "security", "data_analyst",
+            "mad_professor", "accountant", "artist", "lazy_slacker",
+            "black_metal_fundamentalist", "labour_union_rep", "ux_designer",
+            "doris", "chairman_of_board", "maga_appointee", "lawyer",
+        ]
+        config = WorkflowConfig(strict_mode=True, allow_persona_roles=True, max_specialists_per_task=5)
+        cat = classify_task(req)
+        roles = select_relevant_roles(req, all_roles, config, task_category=cat)
+        self.assertIn("black_metal_fundamentalist", roles,
+                       "black_metal_fundamentalist should be selected for a prompt mentioning 'black metal fan'")
+    def test_structured_contribution_parsing_from_black_metal_output(self):
+        """Simulate black metal specialist output and verify structured contribution parsing."""
+        output = (
+            "KVLT VERDICT:\n"
+            "The true kvltist sleeps when the moon commands. Bedtime is for posers "
+            "who follow society's weak schedules.\n\n"
+            "THE GRIM TRUTH:\n"
+            "Time is an illusion created by the false light of day.\n\n"
+            '```json\n'
+            '{\n'
+            '  "role": "Black Metal Fundamentalist",\n'
+            '  "main_points": [\n'
+            '    "True kvltists sleep only when the moon commands",\n'
+            '    "Bedtime schedules are for posers and conformists"\n'
+            '  ],\n'
+            '  "recommendations": [\n'
+            '    "Sleep at dawn, rise at dusk — embrace the nocturnal path"\n'
+            '  ],\n'
+            '  "evidence": [\n'
+            '    "Norwegian black metal musicians are known for nocturnal lifestyles"\n'
+            '  ],\n'
+            '  "assumptions": [\n'
+            '    "The user seeks the true kvlt path, not mainstream advice"\n'
+            '  ],\n'
+            '  "confidence": "high"\n'
+            '}\n'
+            '```\n'
+        )
+        contrib = parse_structured_contribution(output, "Black Metal Fundamentalist")
+        self.assertEqual(contrib.role, "Black Metal Fundamentalist")
+        self.assertEqual(len(contrib.main_points), 2)
+        self.assertIn("kvltists", contrib.main_points[0].lower())
+        self.assertEqual(len(contrib.recommendations), 1)
+        self.assertTrue(contrib.has_substance())
+        self.assertEqual(contrib.confidence, "high")
 if __name__ == "__main__":
     unittest.main()

workflow_helpers.py CHANGED Viewed

@@ -1056,3 +1056,306 @@ def format_assumptions_for_prompt(assumptions: Dict[str, str]) -> str:
     for key, value in assumptions.items():
         lines.append(f"  - {key}: {value}")
     return "\n".join(lines)

     for key, value in assumptions.items():
         lines.append(f"  - {key}: {value}")
     return "\n".join(lines)
+# ============================================================
+# Structured Expert Contributions
+# ============================================================
+# Suffix appended to every specialist system prompt to require JSON output
+STRUCTURED_OUTPUT_SUFFIX = """
+IMPORTANT — OUTPUT FORMAT:
+After your analysis above, you MUST also output a JSON block at the end of your response,
+wrapped in ```json ... ``` fences, with this exact structure:
+```json
+{
+  "role": "<your role name>",
+  "main_points": ["point 1", "point 2"],
+  "recommendations": ["recommendation 1"],
+  "evidence": ["supporting evidence or examples"],
+  "assumptions": ["assumption 1"],
+  "confidence": "high | medium | low"
+}
+```
+- "main_points": your key substantive contributions to the answer (2-4 points)
+- "recommendations": specific actionable recommendations (0-3)
+- "evidence": facts, data, or examples that support your points (0-3)
+- "assumptions": assumptions you relied on (0-2)
+- "confidence": how confident you are in your contribution
+This JSON block is REQUIRED. The Synthesizer will use it to build the final answer.
+Do NOT write a complete final answer — focus on your domain-specific contribution.
+"""
+@dataclass
+class StructuredContribution:
+    """Structured output from an expert specialist."""
+    role: str
+    main_points: List[str] = field(default_factory=list)
+    recommendations: List[str] = field(default_factory=list)
+    evidence: List[str] = field(default_factory=list)
+    assumptions: List[str] = field(default_factory=list)
+    confidence: str = "medium"
+    raw_output: str = ""
+    def to_dict(self) -> dict:
+        return {
+            "role": self.role,
+            "main_points": self.main_points,
+            "recommendations": self.recommendations,
+            "evidence": self.evidence,
+            "assumptions": self.assumptions,
+            "confidence": self.confidence,
+        }
+    def has_substance(self) -> bool:
+        """Check if this contribution has at least one substantive point."""
+        return bool(self.main_points or self.recommendations)
+def parse_structured_contribution(text: str, role: str) -> StructuredContribution:
+    """Parse a StructuredContribution from specialist LLM output.
+    Tries to extract a JSON block from the text. Falls back to heuristic
+    extraction from section headers if JSON is missing or malformed.
+    """
+    contribution = StructuredContribution(role=role, raw_output=text)
+    # Try JSON extraction first — look for ```json ... ``` block
+    json_match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
+    if not json_match:
+        # Also try bare JSON object
+        json_match = re.search(r'(\{\s*"role"\s*:.*\})', text, re.DOTALL)
+    if json_match:
+        try:
+            data = json.loads(json_match.group(1))
+            contribution.main_points = data.get("main_points", [])
+            contribution.recommendations = data.get("recommendations", [])
+            contribution.evidence = data.get("evidence", [])
+            contribution.assumptions = data.get("assumptions", [])
+            contribution.confidence = data.get("confidence", "medium")
+            if data.get("role"):
+                contribution.role = data["role"]
+            return contribution
+        except (json.JSONDecodeError, AttributeError):
+            pass
+    # Fallback: heuristic extraction from section-based output
+    _extract_section_points(text, contribution)
+    return contribution
+def _extract_section_points(text: str, contribution: StructuredContribution):
+    """Heuristic fallback: extract key points from section-based specialist output."""
+    lines = text.strip().splitlines()
+    current_section = ""
+    buffer: List[str] = []
+    # Map known section headers to contribution fields
+    section_map = {
+        # Core roles
+        "ideas": "main_points", "rationale": "main_points",
+        "technical approach": "main_points", "implementation notes": "recommendations",
+        "evidence summary": "evidence", "key findings": "evidence",
+        "security analysis": "main_points", "vulnerabilities found": "main_points",
+        "recommendations": "recommendations",
+        "data overview": "main_points", "analysis": "main_points",
+        "insights": "recommendations",
+        # Persona roles
+        "wild hypothesis": "main_points", "scientific rationale": "evidence",
+        "groundbreaking implications": "main_points",
+        "cost analysis": "main_points", "cost-cutting measures": "recommendations",
+        "cosmic vision": "main_points", "wild storm of ideas": "main_points",
+        "minimum viable effort": "main_points",
+        "kvlt verdict": "main_points", "the grim truth": "main_points",
+        "worker impact": "main_points", "union concerns": "main_points",
+        "collective bargaining position": "recommendations",
+        "user needs analysis": "main_points", "pain points": "main_points",
+        "ux recommendations": "recommendations",
+        "what doris thinks is happening": "main_points",
+        "doris's thoughts": "main_points",
+        "board perspective": "main_points", "strategic concerns": "main_points",
+        "shareholder value": "recommendations",
+        "america first analysis": "main_points",
+        "making it great again": "recommendations",
+        "legal analysis": "main_points", "liabilities and risks": "main_points",
+        "legal recommendations": "recommendations",
+    }
+    def flush_buffer():
+        if current_section and buffer:
+            field_name = section_map.get(current_section.lower().rstrip(":"), "")
+            if field_name:
+                combined = " ".join(ln.strip().lstrip("•-*0123456789.) ") for ln in buffer if ln.strip())
+                if combined:
+                    target = getattr(contribution, field_name)
+                    target.append(combined[:300])
+    for line in lines:
+        header_match = re.match(r"^([A-Z][A-Z\s\'']+):?\s*$", line.strip())
+        if header_match:
+            flush_buffer()
+            current_section = header_match.group(1).strip()
+            buffer = []
+        else:
+            # Skip lines that look like "RECOMMENDED DRAFT:", "FINAL TECHNICAL DRAFT:", etc.
+            if re.match(r"^[A-Z][A-Z\s]+DRAFT:?\s*$", line.strip()):
+                flush_buffer()
+                current_section = ""  # ignore draft sections
+                buffer = []
+            elif current_section:
+                buffer.append(line)
+    flush_buffer()
+def format_contributions_for_synthesizer(
+    contributions: Dict[str, "StructuredContribution"],
+) -> str:
+    """Format structured expert contributions for the Synthesizer prompt.
+    Presents each expert's key points, recommendations, and evidence
+    so the Synthesizer can build the final answer from them.
+    """
+    if not contributions:
+        return ""
+    parts = ["STRUCTURED EXPERT CONTRIBUTIONS:"]
+    for role_key, contrib in contributions.items():
+        role_label = contrib.role
+        section = [f"\n=== {role_label} (confidence: {contrib.confidence}) ==="]
+        if contrib.main_points:
+            section.append("Main points:")
+            for i, pt in enumerate(contrib.main_points):
+                section.append(f"  [{i}] {pt}")
+        if contrib.recommendations:
+            section.append("Recommendations:")
+            for i, rec in enumerate(contrib.recommendations):
+                section.append(f"  [{i}] {rec}")
+        if contrib.evidence:
+            section.append("Evidence:")
+            for ev in contrib.evidence:
+                section.append(f"  - {ev}")
+        if contrib.assumptions:
+            section.append("Assumptions:")
+            for a in contrib.assumptions:
+                section.append(f"  - {a}")
+        parts.append("\n".join(section))
+    return "\n\n".join(parts)
+def format_contributions_for_qa(
+    contributions: Dict[str, "StructuredContribution"],
+    used_contributions: Dict[str, List[str]],
+) -> str:
+    """Format contribution data for QA to verify expert influence."""
+    if not contributions:
+        return ""
+    parts = ["EXPERT CONTRIBUTION TRACEABILITY:"]
+    for role_key, contrib in contributions.items():
+        role_label = contrib.role
+        used = used_contributions.get(role_key, [])
+        section = [f"\n=== {role_label} ==="]
+        if contrib.main_points:
+            for i, pt in enumerate(contrib.main_points):
+                tag = "USED" if f"main_points[{i}]" in used else "NOT USED"
+                section.append(f"  main_points[{i}] [{tag}]: {pt}")
+        if contrib.recommendations:
+            for i, rec in enumerate(contrib.recommendations):
+                tag = "USED" if f"recommendations[{i}]" in used else "NOT USED"
+                section.append(f"  recommendations[{i}] [{tag}]: {rec}")
+        parts.append("\n".join(section))
+    used_count = sum(len(v) for v in used_contributions.values())
+    total_points = sum(
+        len(c.main_points) + len(c.recommendations) for c in contributions.values()
+    )
+    parts.append(f"\nSummary: {used_count}/{total_points} expert contributions marked as used.")
+    return "\n".join(parts)
+def parse_used_contributions(text: str) -> Dict[str, List[str]]:
+    """Parse the Synthesizer's USED_CONTRIBUTIONS JSON block from its output.
+    Returns a dict mapping role_key → list of contribution references
+    like ["main_points[0]", "recommendations[1]"].
+    """
+    # Look for ```json block containing "used_contributions"
+    json_match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
+    if json_match:
+        try:
+            data = json.loads(json_match.group(1))
+            if "used_contributions" in data:
+                return data["used_contributions"]
+        except (json.JSONDecodeError, AttributeError):
+            pass
+    # Look for a USED_CONTRIBUTIONS: section
+    if "USED_CONTRIBUTIONS:" in text:
+        section = text.split("USED_CONTRIBUTIONS:", 1)[1]
+        # Try to find JSON in the section
+        json_match = re.search(r"(\{.*?\})", section, re.DOTALL)
+        if json_match:
+            try:
+                return json.loads(json_match.group(1))
+            except (json.JSONDecodeError, AttributeError):
+                pass
+    return {}
+def check_expert_influence(
+    contributions: Dict[str, "StructuredContribution"],
+    used_contributions: Dict[str, List[str]],
+    final_answer: str,
+) -> List[str]:
+    """Check whether the final answer materially uses expert contributions.
+    Returns a list of influence issues (empty = influence is adequate).
+    """
+    issues: List[str] = []
+    if not contributions:
+        return issues
+    # Check 1: Are any contributions marked as used?
+    total_used = sum(len(refs) for refs in used_contributions.values())
+    total_available = sum(
+        len(c.main_points) + len(c.recommendations)
+        for c in contributions.values() if c.has_substance()
+    )
+    if total_available > 0 and total_used == 0:
+        issues.append(
+            "Final answer does not materially incorporate any specialist contributions."
+        )
+        return issues
+    # Check 2: For each contributing expert, is at least one point used?
+    for role_key, contrib in contributions.items():
+        if not contrib.has_substance():
+            continue
+        role_refs = used_contributions.get(role_key, [])
+        if not role_refs:
+            issues.append(
+                f"Expert '{contrib.role}' provided substantive points but none were used."
+            )
+    # Check 3: Do used points appear to influence the final answer?
+    # (Lightweight check: verify at least some expert vocabulary appears)
+    answer_lower = final_answer.lower()
+    expert_words_found = 0
+    for contrib in contributions.values():
+        for pt in contrib.main_points:
+            # Extract key content words (3+ chars)
+            words = [w for w in re.findall(r"\b\w{3,}\b", pt.lower())
+                     if w not in ("the", "and", "for", "that", "this", "with", "from", "are", "was")]
+            matches = sum(1 for w in words if w in answer_lower)
+            if matches >= 2:
+                expert_words_found += 1
+    if expert_words_found == 0 and total_available > 0:
+        issues.append(
+            "Final answer appears to not reflect expert contribution content."
+        )
+    return issues