diff --git "a/reference_app.py" "b/reference_app.py"
new file mode 100644--- /dev/null
+++ "b/reference_app.py"
@@ -0,0 +1,2906 @@
+# ============================================================================
+# app.py — Four-backend agent teaching demo (Gradio UI shell)
+# ============================================================================
+#
+# PURPOSE
+# -------
+# A chat-driven Gradio app that demonstrates FOUR different backend
+# implementations of the same agent task, side by side. This file is the
+# UI SHELL ONLY — it owns the chat, the tabs, the data source loaders,
+# the training panels, and the download list. It knows nothing about how
+# any individual backend works; it dispatches through a 4-symbol contract.
+#
+# THE FOUR BACKENDS
+# -----------------
+#   agent_workflow.py   — Workflow: 2-step prompt chain, no tools (raw SDK)
+#   agent_py.py         — Simple Python Agent: tool-calling loop (raw SDK)
+#   agent_langchain.py  — LangChain AgentExecutor with tool calling
+#   agent_langgraph.py  — LangGraph state graph with supervisor + task nodes
+#
+# THE CONTRACT (every backend file exports these four symbols)
+# ------------------------------------------------------------
+#   BACKEND_NAME               — string shown in the UI radio
+#   get_client(api_key)        — returns whatever 'client' the runner needs
+#   run(client, user_message)  — returns {"reply", "steps", "extracted"}
+#   build_code_snippets(user_message, steps) -> str  — for the Code tab
+#
+# Adding a new backend = new file with these four symbols, then one
+# import line in ZONE 2 and a registration into BACKENDS dict. No
+# handler, UI, or wiring changes.
+#
+# GRACEFUL DEGRADATION
+# --------------------
+# agent_langchain and agent_langgraph are imported inside try/except.
+# If langchain / langchain-mistralai / langgraph are not installed, those
+# modes are silently hidden from the radio at startup and a warning prints
+# to the console. The app keeps running with Workflow + Simple Python Agent.
+#
+# CODE ORGANIZATION
+# -----------------
+#   ZONE 1: Imports & constants
+#   ZONE 2: Backend imports + helpers (save_json_artifact, build_outputs, ...)
+#   ZONE 3: Action handlers (wired to UI buttons)
+#   ZONE 4: UI definition (gr.Blocks)
+#   ZONE 5: Event wiring (.click handlers — the glue)
+#
+# LOGICAL FLOW OF ONE CHAT TURN
+# -----------------------------
+# User types in chat, clicks Send.
+#   -> send_btn.click fires process_message(...)
+#      -> if loaded_context is set, prepend it to user_message
+#      -> backend = BACKENDS[mode]
+#      -> client = backend.get_client(api_key)
+#      -> result = backend.run(client, effective_message)
+#         -> returns {reply, steps, extracted}
+#      -> build_outputs() produces table / chart / code / extracted JSON
+#         -> calls backend.build_code_snippets(...) for the Code tab
+#      -> save_json_artifact() writes a timestamped run_*.json
+#      -> returns 8 values matching the chat_outputs list in ZONE 5
+#         1. new chat history      -> chatbot
+#         2. steps dataframe       -> Results > Table
+#         3. extracted JSON        -> Results > Extracted
+#         4. chart dataframe       -> Visuals
+#         5. code snippet          -> Results > Code
+#         6. downloads list        -> downloads_state
+#         7. downloads list (same) -> Downloads tab file list
+#         8. empty string          -> chat_input (clears it)
+#
+# DATA SOURCE LOADERS follow a shorter pattern:
+# User loads a URL / PDF / spreadsheet / ML examples -> saves JSON artifact,
+# appends to downloads, updates loaded_context_state for next chat turn.
+# Returns 5 values: preview, status, context, downloads_state, downloads_files.
+#
+# THE TWO RULES THAT WILL SAVE YOU PAIN
+# -------------------------------------
+#  1. Handler return order MUST match its wiring outputs list.
+#     Function returns N values -> outputs=[c1, c2, ..., cN] must have N items
+#     in the same order. Mismatch is the #1 source of silent breakage.
+#
+#  2. All chat handlers (process_message, submit_form, new_chat) share
+#     the same chat_outputs list. If you change the shape of one, change
+#     all three at once.
+#
+# WHERE TO ADD NEW THINGS
+# -----------------------
+#  New backend          -> Create agent_<name>.py with the 4 contract symbols,
+#                          add one import line in ZONE 2, add it to BACKENDS.
+#                          Nothing else changes.
+#
+#  New top-level tab    -> ZONE 4 inside outer gr.Tabs()
+#                          + handler in ZONE 3
+#                          + wiring in ZONE 5
+#
+#  New sub-tab          -> ZONE 4 inside the parent tab's inner gr.Tabs()
+#                          + handler in ZONE 3 following scrape_url pattern
+#                          + wiring in ZONE 5 following scrape_btn pattern
+#
+#  New output display   -> ZONE 4 component + expand build_outputs in ZONE 2
+#                          + add to chat_outputs list
+#                          + update process_message, submit_form, new_chat
+#                            to return one more value in the matching position
+#
+#  New data source      -> Same as sub-tab. Always call save_json_artifact()
+#                          and always return the 5-tuple shape.
+#
+#  New agent tool       -> Edit tools.py only. Add function to TOOL_FUNCTIONS
+#                          dict and schema to TOOL_SCHEMAS list. The raw-SDK
+#                          backends pick it up automatically. For LangChain
+#                          and LangGraph, also wrap it with @lc_tool in
+#                          agent_langchain.py and (if math/info scoped) add
+#                          to MATH_TOOLS or INFO_TOOLS in agent_langgraph.py.
+#
+#  New field in an      -> Find the `artifact = {...}` dict in the relevant
+#  existing JSON           handler in ZONE 3 and add your key.
+#
+# ============================================================================
+
+
+# ============================================================================
+# ZONE 1 — Imports & constants
+# ============================================================================
+import os
+import json
+from datetime import datetime
+
+import gradio as gr
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from pypdf import PdfReader
+
+
+MAX_CONTEXT_CHARS = 5000
+
+
+# ============================================================================
+# ZONE 2 — Helpers (pure functions, no UI knowledge)
+# ============================================================================
+# These functions take plain Python inputs and return plain Python outputs.
+# They know nothing about Gradio. Reusable and testable on their own.
+#
+# NOTE: the actual LLM orchestration (Workflow and Agent runners, the
+# MODES dict, the client, and the code snippet builder) lives in agent.py
+# so that it can be swapped for alternative implementations (LangChain,
+# LangGraph, etc.) without touching this file. We just import what we need.
+# ----------------------------------------------------------------
+# Agent backend — swappable module
+# ----------------------------------------------------------------
+# ----------------------------------------------------------------
+# Agent backends — each file is an independent import.
+# ALL backend imports are wrapped in try/except so the app boots even
+# if one file is broken (missing dep, version conflict, import error).
+# Broken backends are silently hidden from the mode radio at startup and
+# a warning is printed to the console. At least one backend must load
+# or the app will show an empty mode list, but the app itself will run.
+# ----------------------------------------------------------------
+BACKENDS = {}
+
+# Ringmaster is listed FIRST so it becomes the default selection
+try:
+    import agent_langgraph_ringmaster
+    BACKENDS[agent_langgraph_ringmaster.BACKEND_NAME] = agent_langgraph_ringmaster
+except Exception as _rm_err:
+    print(f"[app.py] LangGraph Ringmaster backend unavailable: {_rm_err}")
+
+try:
+    import agent_workflow
+    BACKENDS[agent_workflow.BACKEND_NAME] = agent_workflow
+except Exception as _wf_err:
+    print(f"[app.py] Workflow backend unavailable: {_wf_err}")
+
+try:
+    import agent_py
+    BACKENDS[agent_py.BACKEND_NAME] = agent_py
+except Exception as _py_err:
+    print(f"[app.py] Simple Python Agent backend unavailable: {_py_err}")
+
+try:
+    import agent_langchain
+    BACKENDS[agent_langchain.BACKEND_NAME] = agent_langchain
+except Exception as _lc_err:
+    print(f"[app.py] LangChain backend unavailable: {_lc_err}")
+
+try:
+    import agent_langgraph
+    BACKENDS[agent_langgraph.BACKEND_NAME] = agent_langgraph
+except Exception as _lg_err:
+    print(f"[app.py] LangGraph backend unavailable: {_lg_err}")
+
+try:
+    import agent_smolagents
+    BACKENDS[agent_smolagents.BACKEND_NAME] = agent_smolagents
+except Exception as _sa_err:
+    print(f"[app.py] smolagents backend unavailable: {_sa_err}")
+
+try:
+    import agent_crewai
+    BACKENDS[agent_crewai.BACKEND_NAME] = agent_crewai
+except Exception as _crew_err:
+    print(f"[app.py] CrewAI backend unavailable: {_crew_err}")
+
+try:
+    import agent_llama_index
+    BACKENDS[agent_llama_index.BACKEND_NAME] = agent_llama_index
+except Exception as _li_err:
+    print(f"[app.py] LlamaIndex backend unavailable: {_li_err}")
+
+# Fallback so the UI never crashes on an empty BACKENDS dict
+if not BACKENDS:
+    print("[app.py] WARNING: no backends loaded. Check build logs.")
+
+from examples import ML_EXAMPLES
+from training_data import TRAINING_EXAMPLES
+from training import (
+    train_classifier, predict as classifier_predict,
+    cluster_hierarchical, cluster_report,
+)
+
+try:
+    import vectorstore
+    VECTORSTORE_OK = True
+except Exception as _vs_err:
+    print(f"[app.py] vectorstore unavailable: {_vs_err}")
+    VECTORSTORE_OK = False
+
+import providers
+
+# Workbench packages — each is a self-contained LangGraph supervisor workflow.
+# Wrapped so a broken workbench does not kill the whole app on cold boot.
+# ============================================================================
+# !!! RULE_VIOLATION_6 — DELIBERATE — see COMPLIANCE.md !!!
+# ----------------------------------------------------------------------------
+# Pattern:  try/except around module imports + WB_*_OK flags + print fallback.
+# Reason:   A broken workbench folder (wrong upload, missing __init__, syntax
+#           slip after an edit) must NOT bring down the entire Space on cold
+#           boot. Defensive import lets the seven-backend chat, Supervised ML,
+#           Unsupervised ML, and Vector Processing tabs keep working even if
+#           one workbench is broken.
+# Fix-when: Never. This is the one boundary where graceful degradation is
+#           worth more than strict compliance. Alternative would be pinning
+#           every workbench dependency exhaustively — brittle on HF Spaces.
+# ============================================================================
+try:
+    import workbench_grounded_theory as wb_cgt
+    WB_CGT_OK = True
+    _wb_cgt_err = None
+except Exception as _e:
+    WB_CGT_OK = False
+    _wb_cgt_err = str(_e)
+    print(f"[app.py] workbench_grounded_theory unavailable: {_wb_cgt_err}")
+
+try:
+    import workbench_thematic_analysis as wb_cta
+    WB_CTA_OK = True
+    _wb_cta_err = None
+except Exception as _e:
+    WB_CTA_OK = False
+    _wb_cta_err = str(_e)
+    print(f"[app.py] workbench_thematic_analysis unavailable: {_wb_cta_err}")
+
+try:
+    from workbench_thematic_analysis import phase2_agent
+    PHASE2_AGENT_OK = True
+    _phase2_agent_err = None
+except Exception as _e:
+    PHASE2_AGENT_OK = False
+    _phase2_agent_err = str(_e)
+    print(f"[app.py] phase2_agent unavailable: {_phase2_agent_err}")
+
+try:
+    from phase3_themes import run_phase3_searching_themes
+    PHASE3_OK = True
+    _phase3_err = None
+except Exception as _e:
+    PHASE3_OK = False
+    _phase3_err = str(_e)
+    print(f"[app.py] phase3_themes unavailable: {_phase3_err}")
+
+
+# ----------------------------------------------------------------
+# Artifact writer — every input/run becomes a timestamped JSON file
+# ----------------------------------------------------------------
+def save_json_artifact(data, prefix):
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
+    path = f"{prefix}_{ts}.json"
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2, default=str, ensure_ascii=False)
+    return path
+
+
+# ----------------------------------------------------------------
+# Build outputs for the Results/Visuals tabs from a run result
+# ----------------------------------------------------------------
+def build_outputs(user_message, mode, result):
+    steps_df = pd.DataFrame(result["steps"])
+    extracted_json = json.dumps(result["extracted"], indent=2)
+
+    tool_counts = {}
+    for s in result["steps"]:
+        tool_counts[s["tool"]] = tool_counts.get(s["tool"], 0) + 1
+    if tool_counts:
+        chart_df = pd.DataFrame(
+            [{"tool": k, "count": v} for k, v in tool_counts.items()]
+        )
+    else:
+        chart_df = pd.DataFrame([{"tool": "(none)", "count": 0}])
+
+    # Each backend has its own build_code_snippets — pick the right one.
+    backend = BACKENDS.get(mode)
+    if backend is not None:
+        code_snippet = backend.build_code_snippets(user_message, result["steps"])
+    else:
+        code_snippet = f"# Unknown backend: {mode}"
+    return steps_df, extracted_json, chart_df, code_snippet
+
+
+# ============================================================================
+# ZONE 3 — Action handlers (wired to UI buttons in Zone 5)
+# ============================================================================
+# These are the functions Gradio calls when a button is clicked or a form
+# is submitted. They read state, call Zone 2 helpers, and return values
+# that go directly into UI components.
+#
+# CONVENTIONS:
+#   - Data source loaders return 5 values:
+#       (preview, status, loaded_context, downloads_state, downloads_files)
+#   - Chat handlers (process_message, submit_form, new_chat) return 8 values:
+#       (chat_history, table_df, extracted_json, chart_df, code_snippet,
+#        downloads_state, downloads_files, empty_string_to_clear_input)
+#   - Clear handlers return only the fields they reset. Never touch downloads.
+#
+# ----------------------------------------------------------------
+# Data source loaders
+# Each returns: preview, status, loaded_context, downloads_state, downloads_files
+# Each saves a timestamped JSON artifact and appends to the downloads list.
+# ----------------------------------------------------------------
+def scrape_url(url, downloads_list):
+    dl = list(downloads_list or [])
+    if not url or not url.strip():
+        return "", "Nothing loaded.", "", dl, dl
+
+    resp = requests.get(url.strip(), timeout=15)
+    soup = BeautifulSoup(resp.text, "html.parser")
+    for tag in soup(["script", "style", "noscript"]):
+        tag.decompose()
+    text = soup.get_text(separator=" ", strip=True)[:MAX_CONTEXT_CHARS]
+    status = f"**Loaded:** {url.strip()} — {len(text)} chars"
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "web_scrape",
+        "url": url.strip(),
+        "char_count": len(text),
+        "content": text,
+    }
+    path = save_json_artifact(artifact, "scrape")
+    dl.append(path)
+    return text, status, text, dl, dl
+
+
+def extract_pdf(file_obj, downloads_list):
+    dl = list(downloads_list or [])
+    if file_obj is None:
+        return "", "Nothing loaded.", "", dl, dl
+
+    reader = PdfReader(file_obj.name)
+    text = "\n".join((page.extract_text() or "") for page in reader.pages)
+    text = text[:MAX_CONTEXT_CHARS]
+    status = f"**Loaded:** PDF with {len(reader.pages)} pages — {len(text)} chars"
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "pdf_upload",
+        "filename": os.path.basename(file_obj.name),
+        "page_count": len(reader.pages),
+        "char_count": len(text),
+        "content": text,
+    }
+    path = save_json_artifact(artifact, "pdf")
+    dl.append(path)
+    return text, status, text, dl, dl
+
+
+def load_spreadsheet(file_obj, downloads_list):
+    dl = list(downloads_list or [])
+    if file_obj is None:
+        return pd.DataFrame(), "Nothing loaded.", "", dl, dl
+
+    path_in = file_obj.name
+    if path_in.lower().endswith(".csv"):
+        df = pd.read_csv(path_in)
+    else:
+        df = pd.read_excel(path_in)
+    preview_df = df.head(20)
+    text = df.head(50).to_string()[:MAX_CONTEXT_CHARS]
+    status = f"**Loaded:** {len(df)} rows x {len(df.columns)} columns"
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "spreadsheet_upload",
+        "filename": os.path.basename(path_in),
+        "row_count": int(len(df)),
+        "column_count": int(len(df.columns)),
+        "columns": list(df.columns),
+        "rows": df.head(100).to_dict(orient="records"),
+    }
+    path_out = save_json_artifact(artifact, "spreadsheet")
+    dl.append(path_out)
+    return preview_df, status, text, dl, dl
+
+
+def load_ml_examples(downloads_list):
+    """Load the built-in ML paper catalog as context. No upload needed."""
+    dl = list(downloads_list or [])
+    paper_ids = {e["paper_id"] for e in ML_EXAMPLES}
+    preview_lines = [
+        f"[{e['label']}] {e['sentence'][:90]}{'...' if len(e['sentence']) > 90 else ''}"
+        f"  — {e['paper_title']}, {e['year']}"
+        for e in ML_EXAMPLES[:8]
+    ]
+    preview_lines.append(f"\n... and {max(0, len(ML_EXAMPLES) - 8)} more sentences")
+    preview = "\n".join(preview_lines)
+    status = f"**Loaded:** {len(ML_EXAMPLES)} labeled sentences from {len(paper_ids)} ML papers"
+    context_text = json.dumps(ML_EXAMPLES, indent=2, ensure_ascii=False)[:MAX_CONTEXT_CHARS]
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "ml_examples_catalog",
+        "sentence_count": len(ML_EXAMPLES),
+        "paper_count": len(paper_ids),
+        "examples": ML_EXAMPLES,
+    }
+    path = save_json_artifact(artifact, "ml_examples")
+    dl.append(path)
+    return preview, status, context_text, dl, dl
+
+
+# ----------------------------------------------------------------
+# Clear handlers — reset only the source-specific fields
+# ----------------------------------------------------------------
+def clear_scrape():
+    return "", "", "Nothing loaded.", ""
+
+
+def clear_pdf():
+    return None, "", "Nothing loaded.", ""
+
+
+def clear_spreadsheet():
+    return None, pd.DataFrame(), "Nothing loaded.", ""
+
+
+def clear_ml_examples():
+    return "", "Nothing loaded.", ""
+
+
+# ----------------------------------------------------------------
+# Training handlers — supervised and unsupervised ML on TRAINING_EXAMPLES
+# ----------------------------------------------------------------
+def handle_train(downloads_list):
+    """Fit a TF-IDF + logistic regression classifier and save the result."""
+    dl = list(downloads_list or [])
+    trained = train_classifier()
+
+    # Build a display-friendly confusion matrix dataframe
+    cm_df = pd.DataFrame(
+        trained.confusion,
+        columns=[f"pred:{l}" for l in trained.labels],
+    )
+    cm_df.insert(0, "actual", trained.labels)
+
+    status = (
+        f"**Accuracy:** {trained.accuracy:.1%}  \n"
+        f"**Train size:** {trained.train_size}, "
+        f"**Test size:** {trained.test_size}"
+    )
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "supervised_training",
+        "accuracy": trained.accuracy,
+        "train_size": trained.train_size,
+        "test_size": trained.test_size,
+        "labels": trained.labels,
+        "confusion_matrix": trained.confusion,
+    }
+    path = save_json_artifact(artifact, "training")
+    dl.append(path)
+
+    return trained, status, cm_df, dl, dl
+
+
+def handle_predict(trained, sentence, downloads_list):
+    """Predict the label of a new sentence using a previously trained model."""
+    dl = list(downloads_list or [])
+    if trained is None:
+        return "Train the classifier first.", dl, dl
+    if not sentence or not sentence.strip():
+        return "Enter a sentence to predict.", dl, dl
+
+    result = classifier_predict(trained, sentence.strip())
+
+    lines = [
+        f"**Predicted label:** `{result['predicted_label']}`",
+        f"**Confidence:** {result['confidence']:.1%}",
+        "",
+        "**Class probabilities:**",
+    ]
+    for label, prob in sorted(result["probabilities"].items(), key=lambda x: -x[1]):
+        lines.append(f"- `{label}`: {prob:.1%}")
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "supervised_prediction",
+        **result,
+    }
+    path = save_json_artifact(artifact, "prediction")
+    dl.append(path)
+
+    return "\n".join(lines), dl, dl
+
+
+def handle_cluster(similarity_threshold, min_cluster_size, n_nearest,
+                   enable_llm_labels, llm_provider, llm_key, downloads_list):
+    """Parameterized clustering with optional LLM labeling of each cluster.
+
+    Uses training.cluster_with_params which returns:
+        - cluster_ids per sentence (-1 = noise)
+        - centroids per surviving cluster
+        - n_nearest representative sentences per cluster
+    Then (optionally) sends those representatives to an LLM with a
+    constrained prompt that asks for a short cluster label.
+    """
+    from training import cluster_with_params as _cwp
+
+    dl = list(downloads_list or [])
+
+    sentences = [e["sentence"] for e in TRAINING_EXAMPLES]
+    true_labels = [e["label"] for e in TRAINING_EXAMPLES]
+
+    result = _cwp(
+        sentences,
+        similarity_threshold=float(similarity_threshold),
+        min_cluster_size=int(min_cluster_size),
+        n_nearest=int(n_nearest),
+    )
+
+    cluster_ids = result["cluster_ids"]
+    representatives = result["representatives"]
+    distances = result["distances_to_centroid"]
+
+    # Build LLM labels if enabled
+    llm_labels = {}
+    llm_error = None
+    if enable_llm_labels and result["n_clusters_found"] > 0:
+        try:
+            client = providers.get_llm_client(llm_provider, llm_key)
+            model_name = providers.get_llm_model(llm_provider)
+            for cid, reps in representatives.items():
+                rep_sentences = [sentences[i] for i, _d in reps]
+                numbered = "\n".join(
+                    f"{k+1}. {s}" for k, s in enumerate(rep_sentences)
+                )
+                prompt = (
+                    f"The following {len(rep_sentences)} sentences were grouped "
+                    f"together by a clustering algorithm. Based ONLY on these "
+                    f"sentences, produce a short label (2-5 words) that describes "
+                    f"what they have in common. Output ONLY the label, nothing else.\n\n"
+                    f"{numbered}\n\nLabel:"
+                )
+                resp = client.chat.complete(
+                    model=model_name,
+                    messages=[{"role": "user", "content": prompt}],
+                    temperature=0.2,
+                    max_tokens=40,
+                )
+                label = (resp.choices[0].message.content or "").strip()
+                # Trim to first line, cap length
+                label = label.split("\n")[0][:60]
+                llm_labels[cid] = label
+        except Exception as e:
+            llm_error = str(e)
+
+    # Build sentence-level dataframe
+    sent_rows = []
+    for idx, sent in enumerate(sentences):
+        cid = cluster_ids[idx]
+        rep_idxs = {i for i, _d in representatives.get(cid, [])}
+        sent_rows.append({
+            "idx": idx,
+            "sentence": sent,
+            "true_label": true_labels[idx],
+            "cluster_id": "noise" if cid == -1 else str(cid),
+            "cluster_label": llm_labels.get(cid, "") if cid != -1 else "",
+            "is_representative": idx in rep_idxs,
+            "dist_to_centroid": (
+                round(distances[idx], 4) if distances[idx] is not None else None
+            ),
+        })
+    sent_df = pd.DataFrame(sent_rows)
+
+    n_found = result["n_clusters_found"]
+    n_noise = result["n_noise_points"]
+    status_parts = [
+        f"**Similarity >= {float(similarity_threshold):.2f}**, "
+        f"**min size = {int(min_cluster_size)}**, "
+        f"**N nearest = {int(n_nearest)}**",
+        f"**Found:** {n_found} cluster(s), **Noise:** {n_noise} sentence(s)",
+    ]
+    if enable_llm_labels:
+        if llm_error:
+            status_parts.append(f"**LLM labeling failed:** {llm_error}")
+        else:
+            status_parts.append(f"**LLM labels generated** via {llm_provider}")
+    status = "  \n".join(status_parts)
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "unsupervised_clustering_parameterized",
+        "algorithm": "Hierarchical Agglomerative",
+        "similarity_threshold": float(similarity_threshold),
+        "min_cluster_size": int(min_cluster_size),
+        "n_nearest": int(n_nearest),
+        "n_clusters_found": n_found,
+        "n_noise_points": n_noise,
+        "llm_provider": llm_provider if enable_llm_labels else None,
+        "llm_labels": {str(k): v for k, v in llm_labels.items()},
+        "sentences": sent_rows,
+    }
+    path = save_json_artifact(artifact, "clusters_params")
+    dl.append(path)
+
+    return sent_df, status, dl, dl
+
+
+# ----------------------------------------------------------------
+# Workbench handlers — Grounded Theory (Nelson 2020) + Thematic Analysis
+# ----------------------------------------------------------------
+def handle_wb_cgt(user_message, similarity_threshold, min_cluster_size,
+                  n_nearest, llm_provider, llm_key, loaded_context, downloads_list):
+    """Run the Computational Grounded Theory supervisor graph.
+
+    Three-step framework from Nelson 2020. Round 1: Pattern Detection is
+    a real LangGraph node, Pattern Refinement and Pattern Confirmation
+    are placeholders that return 'not yet implemented'.
+
+    Sentence source resolution:
+      1. If loaded_context (from the Inputs tab) is non-empty, split it
+         on newlines and use those sentences with true_labels="(unknown)".
+      2. Otherwise fall back to the built-in TRAINING_EXAMPLES demo corpus
+         with its real ground-truth labels.
+    """
+    dl = list(downloads_list or [])
+
+    # !!! RULE_VIOLATION_7 — DELIBERATE — see COMPLIANCE.md !!!
+    if not WB_CGT_OK:
+        return (
+            pd.DataFrame(),
+            "# Workbench unavailable\n\n" + (_wb_cgt_err or "unknown error"),
+            pd.DataFrame(),
+            dl, dl,
+        )
+
+    # ---- Resolve sentence source ----
+    if loaded_context and loaded_context.strip():
+        sentences = [s.strip() for s in loaded_context.split("\n") if s.strip()]
+        true_labels = ["(unknown)"] * len(sentences)
+        data_source = "uploaded"
+    else:
+        from training_data import TRAINING_EXAMPLES
+        sentences = [e["sentence"] for e in TRAINING_EXAMPLES]
+        true_labels = [e["label"] for e in TRAINING_EXAMPLES]
+        data_source = "demo"
+
+    result = wb_cgt.run(
+        user_message=user_message or "Run computational grounded theory on the training data.",
+        sentences=sentences,
+        true_labels=true_labels,
+        data_source=data_source,
+        similarity_threshold=float(similarity_threshold),
+        min_cluster_size=int(min_cluster_size),
+        n_nearest=int(n_nearest),
+        llm_provider=llm_provider,
+        llm_key=llm_key,
+    )
+
+    trace_df = pd.DataFrame(result.get("steps") or [])
+    reply_md = "## Supervisor reply\n\n" + (result.get("reply") or "(empty)")
+    reply_md += f"\n\n*Data source: **{data_source}** ({len(sentences)} sentences)*"
+
+    det = result.get("detection_result") or {}
+    sentence_rows = det.get("sentence_rows") or []
+    sentences_df = pd.DataFrame(sentence_rows) if sentence_rows else pd.DataFrame()
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "workbench_cgt",
+        "paper": "Nelson 2020 - Computational Grounded Theory",
+        "data_source": data_source,
+        "n_sentences": len(sentences),
+        "parameters": {
+            "similarity_threshold": float(similarity_threshold),
+            "min_cluster_size": int(min_cluster_size),
+            "n_nearest": int(n_nearest),
+            "llm_provider": llm_provider,
+        },
+        "reply": result.get("reply"),
+        "steps": result.get("steps"),
+        "detection_result": result.get("detection_result"),
+        "refinement_result": result.get("refinement_result"),
+        "confirmation_result": result.get("confirmation_result"),
+    }
+    path = save_json_artifact(artifact, "workbench_cgt")
+    dl.append(path)
+
+    return trace_df, reply_md, sentences_df, dl, dl
+
+
+def handle_wb_cta(user_message, max_sentences, llm_provider, llm_key,
+                  loaded_context, downloads_list):
+    """Run the Computational Thematic Analysis supervisor graph.
+
+    Six-phase framework from Braun & Clarke 2006. Round 1: Phase 2
+    (Generating Initial Codes) is a real LangGraph node, Phases 1, 3,
+    4, 5, 6 are placeholders that return 'not yet implemented'.
+
+    Sentence source resolution: same as CGT — loaded_context from Inputs
+    tab first, fall back to TRAINING_EXAMPLES demo corpus.
+    """
+    dl = list(downloads_list or [])
+
+    # !!! RULE_VIOLATION_7 — DELIBERATE — see COMPLIANCE.md !!!
+    # Same pattern as above: pairs with RULE_VIOLATION_6 on cold-boot
+    # import failure.
+    if not WB_CTA_OK:
+        return (
+            pd.DataFrame(),
+            "# Workbench unavailable\n\n" + (_wb_cta_err or "unknown error"),
+            pd.DataFrame(),
+            dl, dl,
+        )
+
+    # ---- Resolve sentence source ----
+    if loaded_context and loaded_context.strip():
+        sentences = [s.strip() for s in loaded_context.split("\n") if s.strip()]
+        true_labels = ["(unknown)"] * len(sentences)
+        data_source = "uploaded"
+    else:
+        from training_data import TRAINING_EXAMPLES
+        sentences = [e["sentence"] for e in TRAINING_EXAMPLES]
+        true_labels = [e["label"] for e in TRAINING_EXAMPLES]
+        data_source = "demo"
+
+    result = wb_cta.run(
+        user_message=user_message or "Run reflexive thematic analysis on the training data.",
+        sentences=sentences,
+        true_labels=true_labels,
+        data_source=data_source,
+        max_sentences_to_code=int(max_sentences),
+        llm_provider=llm_provider,
+        llm_key=llm_key,
+    )
+
+    trace_df = pd.DataFrame(result.get("steps") or [])
+    reply_md = "## Supervisor reply\n\n" + (result.get("reply") or "(empty)")
+    reply_md += f"\n\n*Data source: **{data_source}** ({len(sentences)} sentences)*"
+
+    phase2 = result.get("phase2_initial_codes") or {}
+    coded_rows = phase2.get("coded_rows") or []
+    codes_df = pd.DataFrame(coded_rows) if coded_rows else pd.DataFrame()
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "workbench_cta",
+        "paper": "Braun & Clarke 2006 - Reflexive Thematic Analysis",
+        "data_source": data_source,
+        "n_sentences": len(sentences),
+        "parameters": {
+            "max_sentences_to_code": int(max_sentences),
+            "llm_provider": llm_provider,
+        },
+        "reply": result.get("reply"),
+        "steps": result.get("steps"),
+        "phase1_familiarization":  result.get("phase1_familiarization"),
+        "phase2_initial_codes":    result.get("phase2_initial_codes"),
+        "phase3_searching_themes": result.get("phase3_searching_themes"),
+        "phase4_reviewing_themes": result.get("phase4_reviewing_themes"),
+        "phase5_defining_naming":  result.get("phase5_defining_naming"),
+        "phase6_producing_report": result.get("phase6_producing_report"),
+    }
+    path = save_json_artifact(artifact, "workbench_cta")
+    dl.append(path)
+
+    return trace_df, reply_md, codes_df, dl, dl
+
+
+def clear_training():
+    return None, "Not trained yet.", pd.DataFrame(), ""
+
+
+def clear_clustering():
+    return pd.DataFrame(), "Not clustered yet."
+
+
+def filter_training_dataset(label):
+    """Filter the training-data dataframe shown in the Supervised Dataset sub-tab."""
+    if label == "(all)" or not label:
+        return pd.DataFrame(TRAINING_EXAMPLES)
+    return pd.DataFrame([e for e in TRAINING_EXAMPLES if e["label"] == label])
+
+
+# ============================================================================
+# Phase 1 Familiarization handlers — Braun & Clarke 2006, Phase 1
+# ============================================================================
+# These handlers drive the Phase 1 — Familiarization sub-tab inside CTA.
+# The flow follows Braun & Clarke's active-reading protocol, implemented
+# through grounded dialogue partners (Gemini Gems + NotebookLM) plus
+# researcher confirmation:
+#   1. Load canonical corpus CSV (doc_id, doc_title, section, sub_section, sentence)
+#   2. Researcher runs Familiarization Facilitator dialogue in Gemini,
+#      pastes familiarization notes + transcript + source evidence back
+#   3. Researcher runs Reflexive Companion dialogue, pastes reflexive
+#      challenges + reflexive positioning + immersion coverage back
+#   4. Build researcher confirmation table joining corpus with noticings
+#   5. Researcher edits the table (confirm/refine/reject each noticing)
+#   6. Save to JSON artifact for Downloads tab
+# ----------------------------------------------------------------
+
+P1_REQUIRED_COLUMNS = ["doc_id", "doc_title", "section", "sub_section", "sentence"]
+
+
+def handle_p1_load_test_csv(downloads_list):
+    """Load the built-in test_phase1.csv for pipeline verification."""
+    dl = list(downloads_list or [])
+    try:
+        df = pd.read_csv("test_phase1.csv")
+    except Exception as e:
+        return (
+            [],
+            f"Failed to load test_phase1.csv: {e}",
+            pd.DataFrame(),
+            dl, dl,
+        )
+
+    missing = [c for c in P1_REQUIRED_COLUMNS if c not in df.columns]
+    if missing:
+        return (
+            [],
+            f"test_phase1.csv is missing required columns: {missing}",
+            pd.DataFrame(),
+            dl, dl,
+        )
+
+    corpus = df[P1_REQUIRED_COLUMNS].to_dict("records")
+    status = (
+        f"**Loaded test_phase1.csv** — {len(corpus)} sentences across "
+        f"{df['doc_id'].nunique()} documents, "
+        f"{df['section'].nunique()} unique sections."
+    )
+    return corpus, status, df[P1_REQUIRED_COLUMNS], dl, dl
+
+
+def handle_p1_upload_csv(file_obj, downloads_list):
+    """Load a user-uploaded canonical CSV."""
+    dl = list(downloads_list or [])
+    if file_obj is None:
+        return [], "No file uploaded.", pd.DataFrame(), dl, dl
+
+    try:
+        df = pd.read_csv(file_obj.name)
+    except Exception as e:
+        return [], f"Failed to read CSV: {e}", pd.DataFrame(), dl, dl
+
+    missing = [c for c in P1_REQUIRED_COLUMNS if c not in df.columns]
+    if missing:
+        return (
+            [],
+            f"Uploaded CSV is missing required columns: {missing}. "
+            f"Canonical schema is: {P1_REQUIRED_COLUMNS}",
+            pd.DataFrame(),
+            dl, dl,
+        )
+
+    corpus = df[P1_REQUIRED_COLUMNS].to_dict("records")
+    status = (
+        f"**Loaded uploaded CSV** — {len(corpus)} sentences across "
+        f"{df['doc_id'].nunique()} documents."
+    )
+    return corpus, status, df[P1_REQUIRED_COLUMNS], dl, dl
+
+
+def handle_p1_build_validation_table(
+    corpus,
+    facilitator_memo, facilitator_transcript, facilitator_citations,
+    companion_challenges, companion_reflexivity, companion_breadth,
+):
+    """Build the researcher confirmation table from corpus + pasted Phase 1 outputs.
+
+    Strategy: start with every corpus row (doc_id, doc_title, section,
+    sub_section, sentence), then append empty initial_noticing /
+    researcher_confirmation columns. The researcher edits the table inline
+    to attach initial noticings to specific sentences and mark each one
+    confirm/refine/reject.
+
+    This is the minimum viable version. A future round will parse the
+    pasted source evidence and auto-populate the initial_noticing column
+    for sentences that were explicitly quoted during the dialogue.
+    """
+    if not corpus:
+        empty = pd.DataFrame(columns=[
+            "doc_id", "doc_title", "section", "sub_section", "sentence",
+            "initial_noticing", "reflexive_challenge",
+            "researcher_confirmation", "refined_noticing",
+        ])
+        return empty
+
+    rows = []
+    for r in corpus:
+        rows.append({
+            "doc_id": r.get("doc_id", ""),
+            "doc_title": r.get("doc_title", ""),
+            "section": r.get("section", ""),
+            "sub_section": r.get("sub_section", ""),
+            "sentence": r.get("sentence", ""),
+            "initial_noticing": "",
+            "reflexive_challenge": "",
+            "researcher_confirmation": "",
+            "refined_noticing": "",
+        })
+    return pd.DataFrame(rows)
+
+
+def handle_p1_save(
+    corpus,
+    facilitator_memo, facilitator_transcript, facilitator_citations,
+    companion_challenges, companion_reflexivity, companion_breadth,
+    validation_table,
+    downloads_list,
+):
+    """Save all Phase 1 outputs as a timestamped JSON artifact."""
+    dl = list(downloads_list or [])
+
+    # Convert confirmation dataframe to list-of-dicts for JSON
+    if isinstance(validation_table, pd.DataFrame):
+        confirmation_rows = validation_table.fillna("").to_dict("records")
+    else:
+        confirmation_rows = []
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "phase1_familiarization",
+        "methodology": "Braun & Clarke 2006 Phase 1 — Familiarizing Yourself With Your Data",
+        "corpus_size": len(corpus or []),
+        "step1_familiarization_facilitator": {
+            "familiarization_notes": facilitator_memo or "",
+            "active_reading_transcript": facilitator_transcript or "",
+            "source_evidence": facilitator_citations or "",
+        },
+        "step2_reflexive_companion": {
+            "reflexive_challenges": companion_challenges or "",
+            "reflexive_positioning": companion_reflexivity or "",
+            "dataset_immersion_coverage": companion_breadth or "",
+        },
+        "step3_researcher_confirmation_table": confirmation_rows,
+    }
+    path = save_json_artifact(artifact, "phase1_familiarization")
+    dl.append(path)
+    status = (
+        f"**Saved Phase 1 familiarization output** — {len(corpus or [])} corpus sentences, "
+        f"{len(confirmation_rows)} confirmation rows. "
+        f"Artifact: `{path.split('/')[-1]}`"
+    )
+    return status, dl, dl
+
+
+# ============================================================================
+# Phase 2 Initial Coding handlers — Braun & Clarke 2006, Phase 2
+# ============================================================================
+# Round 1: scaffolding + data flow. Round 2 replaces placeholder agent with
+# real LangGraph supervisor. Round 3 adds iteration 2/3 + convergence.
+#
+# The agent architecture (Round 2) will have 7 tools:
+#   - read_corpus(filter)
+#   - read_phase1_context()
+#   - propose_code(sentence, semantic, latent)
+#   - check_codebook(code_name)
+#   - add_to_codebook(code_name, definition, example)
+#   - flag_for_review(sentence, reason)
+#   - save_iteration(n)
+# ----------------------------------------------------------------
+
+
+def handle_p2_refresh_corpus(
+    corpus,
+    facilitator_memo, companion_reflexivity, validation_table,
+):
+    """Refresh Phase 2 corpus status + Phase 1 context summary.
+
+    Phase 2 reads the corpus loaded in Phase 1 (shared state). It also
+    surfaces Phase 1's reflexive positioning and confirmed noticings as
+    context for the agent.
+    """
+    if not corpus:
+        return (
+            "**No corpus loaded.** Go to Phase 1 — Familiarization and load "
+            "test_phase1.csv (or your own canonical CSV) first.",
+            "*Phase 1 output will appear here after Save Phase 1.*",
+        )
+
+    # Count confirmed noticings from Phase 1 validation table
+    confirmed_count = 0
+    if isinstance(validation_table, pd.DataFrame) and not validation_table.empty:
+        noticings_col = validation_table.get("initial_noticing")
+        if noticings_col is not None:
+            confirmed_count = sum(
+                1 for v in noticings_col.fillna("").tolist() if str(v).strip()
+            )
+
+    n_docs = len({r.get("doc_id", "") for r in corpus})
+    corpus_status = (
+        f"**Corpus ready** — {len(corpus)} sentences across {n_docs} documents. "
+        f"Inherited from Phase 1 state."
+    )
+
+    p1_summary_parts = []
+    if facilitator_memo and facilitator_memo.strip():
+        preview = facilitator_memo.strip()[:300]
+        p1_summary_parts.append(f"**Familiarization notes:** {preview}...")
+    if companion_reflexivity and companion_reflexivity.strip():
+        preview = companion_reflexivity.strip()[:300]
+        p1_summary_parts.append(f"**Reflexive positioning:** {preview}...")
+    p1_summary_parts.append(
+        f"**Confirmed initial noticings:** {confirmed_count} rows with non-empty `initial_noticing`."
+    )
+    p1_summary = "\n\n".join(p1_summary_parts) if p1_summary_parts else (
+        "*Phase 1 output will appear here after Save Phase 1.*"
+    )
+
+    return corpus_status, p1_summary
+
+
+def handle_p2_run_iteration(
+    iteration_n, corpus,
+    existing_codes_table, existing_codebook_table,
+    facilitator_memo, companion_reflexivity, validation_table,
+    llm_provider, llm_key,
+    orientation,
+):
+    """Run one Phase 2 coding iteration via the real LangGraph agent.
+
+    Strict B&C 2006 Phase 2:
+    - Multiple codes per segment (1-5)
+    - Context window (2 before + 2 after)
+    - Researcher-chosen orientation (semantic OR latent, not both)
+    - Reflexive positioning injected into every code prompt
+    - Researcher override is final
+    """
+    # Empty corpus guard
+    if not corpus:
+        empty_codes = pd.DataFrame(columns=[
+            "doc_id", "doc_title", "section", "sub_section", "sentence",
+            "ai_code_iter1", "human_code_iter1",
+            "ai_code_iter2", "human_code_iter2",
+            "ai_code_iter3", "human_code_iter3",
+            "final_code", "flagged",
+        ])
+        empty_codebook = pd.DataFrame(columns=[
+            "code_name", "definition", "created_by", "provenance", "sentence_count",
+        ])
+        return (
+            empty_codes, empty_codebook,
+            "**Cannot run — no corpus loaded.** Load corpus in Phase 1 first.",
+        )
+
+    # Agent availability guard
+    if not PHASE2_AGENT_OK:
+        empty_codes = pd.DataFrame(columns=[
+            "doc_id", "doc_title", "section", "sub_section", "sentence",
+            "ai_code_iter1", "human_code_iter1",
+            "ai_code_iter2", "human_code_iter2",
+            "ai_code_iter3", "human_code_iter3",
+            "final_code", "flagged",
+        ])
+        empty_codebook = pd.DataFrame(columns=[
+            "code_name", "definition", "created_by", "provenance", "sentence_count",
+        ])
+        return (
+            empty_codes, empty_codebook,
+            f"**Phase 2 agent unavailable** — `{_phase2_agent_err}`",
+        )
+
+    # API key guard
+    if not llm_key or not str(llm_key).strip():
+        empty_codes = pd.DataFrame(columns=[
+            "doc_id", "doc_title", "section", "sub_section", "sentence",
+            "ai_code_iter1", "human_code_iter1",
+            "ai_code_iter2", "human_code_iter2",
+            "ai_code_iter3", "human_code_iter3",
+            "final_code", "flagged",
+        ])
+        empty_codebook = pd.DataFrame(columns=[
+            "code_name", "definition", "created_by", "provenance", "sentence_count",
+        ])
+        return (
+            empty_codes, empty_codebook,
+            "**Cannot run — Mistral API key is missing.** Paste it in the sidebar first.",
+        )
+
+    # Initialize the codes table (carry forward if it exists)
+    if isinstance(existing_codes_table, pd.DataFrame) and not existing_codes_table.empty:
+        codes_df = existing_codes_table.copy()
+    else:
+        rows = []
+        for r in corpus:
+            rows.append({
+                "doc_id": r.get("doc_id", ""),
+                "doc_title": r.get("doc_title", ""),
+                "section": r.get("section", ""),
+                "sub_section": r.get("sub_section", ""),
+                "sentence": r.get("sentence", ""),
+                "ai_code_iter1": "",
+                "human_code_iter1": "",
+                "ai_code_iter2": "",
+                "human_code_iter2": "",
+                "ai_code_iter3": "",
+                "human_code_iter3": "",
+                "final_code": "",
+                "flagged": "",
+            })
+        codes_df = pd.DataFrame(rows)
+
+    # Initialize codebook
+    if isinstance(existing_codebook_table, pd.DataFrame) and not existing_codebook_table.empty:
+        codebook_list = existing_codebook_table.fillna("").to_dict("records")
+    else:
+        codebook_list = []
+
+    # Build confirmed_noticings list from Phase 1 validation table
+    confirmed_noticings = []
+    if isinstance(validation_table, pd.DataFrame) and not validation_table.empty:
+        noticing_col = validation_table.get("initial_noticing")
+        if noticing_col is not None:
+            confirmed_noticings = [
+                str(v).strip() for v in noticing_col.fillna("").tolist()
+                if str(v).strip()
+            ]
+
+    # Build agent context
+    agent_context = {
+        "corpus": corpus,
+        "phase1": {
+            "reflexive_positioning": companion_reflexivity or "",
+            "familiarization_notes": facilitator_memo or "",
+            "confirmed_noticings": confirmed_noticings,
+        },
+        "orientation": orientation or "semantic",
+        "existing_codes_df": codes_df if iteration_n >= 2 else None,
+        "codebook": codebook_list,
+        "proposed_codes": {},
+    }
+
+    # Run the agent
+    try:
+        steps, reply, result_context = phase2_agent.run_phase2_iteration(
+            llm_provider=llm_provider,
+            llm_key=llm_key,
+            iteration_n=int(iteration_n),
+            context=agent_context,
+        )
+    except Exception as e:
+        return (
+            codes_df,
+            pd.DataFrame(codebook_list) if codebook_list else pd.DataFrame(columns=[
+                "code_name", "definition", "created_by", "provenance", "sentence_count",
+            ]),
+            f"**Phase 2 agent error:** {e}",
+        )
+
+    # Merge agent results into codes_df
+    # New shape: each proposed entry has "codes": [list of 1-5 strings]
+    proposed = result_context.get("proposed_codes", {})
+    ai_col = f"ai_code_iter{int(iteration_n)}"
+
+    for idx, code_dict in proposed.items():
+        if 0 <= int(idx) < len(codes_df):
+            codes_list = code_dict.get("codes", []) or []
+            if isinstance(codes_list, str):
+                codes_list = [codes_list]
+            combined = ", ".join(c for c in codes_list if c)
+            codes_df.at[int(idx), ai_col] = combined
+
+    # Update final_code column — latest human edit wins, else latest AI code
+    for i in range(len(codes_df)):
+        final = ""
+        for it in (3, 2, 1):
+            h = codes_df.at[i, f"human_code_iter{it}"]
+            if h and str(h).strip():
+                final = str(h).strip()
+                break
+        if not final:
+            for it in (3, 2, 1):
+                a = codes_df.at[i, f"ai_code_iter{it}"]
+                if a and str(a).strip():
+                    final = str(a).strip()
+                    break
+        codes_df.at[i, "final_code"] = final
+
+    # Build codebook DataFrame
+    updated_codebook = result_context.get("codebook", [])
+    codebook_df = pd.DataFrame(updated_codebook) if updated_codebook else pd.DataFrame(
+        columns=["code_name", "definition", "created_by", "provenance", "sentence_count"]
+    )
+
+    total_codes = sum(len(v.get("codes", [])) for v in proposed.values())
+    status = (
+        f"**Iteration {iteration_n} complete** ({orientation} orientation). "
+        f"Coded {len(proposed)} sentences with {total_codes} total codes "
+        f"(avg {total_codes/len(proposed) if proposed else 0:.1f} codes/sentence). "
+        f"Codebook has {len(updated_codebook)} entries. "
+        f"Agent took {len(steps)} steps. "
+        f"Reply: {reply[:200]}"
+    )
+    return codes_df, codebook_df, status
+
+
+def handle_p2_save(
+    corpus,
+    codes_table, codebook_table,
+    downloads_list,
+):
+    """Save Phase 2 outputs as a timestamped JSON artifact."""
+    dl = list(downloads_list or [])
+
+    if isinstance(codes_table, pd.DataFrame):
+        codes_rows = codes_table.fillna("").to_dict("records")
+    else:
+        codes_rows = []
+    if isinstance(codebook_table, pd.DataFrame):
+        codebook_rows = codebook_table.fillna("").to_dict("records")
+    else:
+        codebook_rows = []
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "phase2_initial_coding",
+        "methodology": "Braun & Clarke 2006 Phase 2 — Generating Initial Codes (agentic)",
+        "corpus_size": len(corpus or []),
+        "codes_table": codes_rows,
+        "codebook": codebook_rows,
+    }
+    path = save_json_artifact(artifact, "phase2_initial_coding")
+    dl.append(path)
+    status = (
+        f"**Saved Phase 2 initial coding output** — {len(codes_rows)} coded rows, "
+        f"{len(codebook_rows)} codebook entries. Artifact: `{path.split('/')[-1]}`"
+    )
+    return status, dl, dl
+
+
+
+
+# ----------------------------------------------------------------
+# Phase 3 -- Searching for Themes handlers (Braun & Clarke 2006)
+# ----------------------------------------------------------------
+def handle_p3_run(
+    codebook_table,
+    similarity_threshold,
+    min_cluster_size,
+    orientation,
+    companion_reflexivity,
+    llm_provider, llm_key,
+    downloads_list,
+):
+    dl = list(downloads_list or [])
+    empty_themes = pd.DataFrame(columns=[
+        "theme_id", "candidate_theme_name", "description", "rationale",
+        "member_codes", "code_count", "researcher_theme_name", "researcher_notes",
+    ])
+    empty_noise = pd.DataFrame(columns=["code_name", "definition"])
+
+    if not PHASE3_OK:
+        return (empty_themes, empty_noise,
+                f"**Phase 3 unavailable** -- {_phase3_err}", dl, dl)
+
+    if codebook_table is None or (isinstance(codebook_table, pd.DataFrame) and codebook_table.empty):
+        return (empty_themes, empty_noise,
+                "**Cannot run Phase 3** -- no codebook. Run Phase 2 first.", dl, dl)
+
+    key = (llm_key or "").strip() or os.environ.get("MISTRAL_API_KEY", "")
+    if not key:
+        return (empty_themes, empty_noise,
+                "**Cannot run Phase 3** -- Mistral API key missing.", dl, dl)
+
+    codebook_df = codebook_table.copy() if isinstance(codebook_table, pd.DataFrame) else pd.DataFrame(codebook_table)
+
+    try:
+        result = run_phase3_searching_themes(
+            codebook_df=codebook_df,
+            llm_provider=llm_provider or "Mistral",
+            llm_key=key,
+            similarity_threshold=float(similarity_threshold),
+            min_cluster_size=int(min_cluster_size),
+            orientation=orientation or "semantic",
+            reflexive_pos=companion_reflexivity or "",
+        )
+    except Exception as e:
+        return (empty_themes, empty_noise, f"**Phase 3 error:** {e}", dl, dl)
+
+    themes_df = pd.DataFrame(result["themes_rows"]) if result["themes_rows"] else empty_themes
+    noise_df = pd.DataFrame(result["noise_codes"]) if result["noise_codes"] else empty_noise
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "phase3_searching_themes",
+        "methodology": "Braun & Clarke 2006 Phase 3 -- Searching for Themes",
+        "similarity_threshold": float(similarity_threshold),
+        "min_cluster_size": int(min_cluster_size),
+        "orientation": orientation,
+        "n_themes": result["n_themes"],
+        "n_noise": result["n_noise"],
+        "themes": result["themes_rows"],
+        "noise_codes": result["noise_codes"],
+    }
+    path = save_json_artifact(artifact, "phase3_searching_themes")
+    dl.append(path)
+
+    status = (
+        "**Phase 3 complete.** "
+        + str(result["n_themes"]) + " candidate themes from "
+        + str(len(codebook_df)) + " codes. "
+        + str(result["n_noise"]) + " codes in noise bucket. "
+        + "Artifact: `" + path.split("/")[-1] + "`"
+    )
+    return themes_df, noise_df, status, dl, dl
+
+
+def handle_p3_save(themes_table, noise_table, downloads_list):
+    dl = list(downloads_list or [])
+    themes_rows = themes_table.fillna("").to_dict("records") if isinstance(themes_table, pd.DataFrame) else []
+    noise_rows = noise_table.fillna("").to_dict("records") if isinstance(noise_table, pd.DataFrame) else []
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "phase3_researcher_confirmed_themes",
+        "methodology": "Braun & Clarke 2006 Phase 3 -- Researcher-confirmed candidate themes",
+        "themes": themes_rows,
+        "noise_codes": noise_rows,
+    }
+    path = save_json_artifact(artifact, "phase3_themes")
+    dl.append(path)
+    status = (
+        "**Saved Phase 3 themes** -- "
+        + str(len(themes_rows)) + " themes, "
+        + str(len(noise_rows)) + " noise codes. Artifact: `" + path.split("/")[-1] + "`"
+    )
+    return status, dl, dl
+
+# ----------------------------------------------------------------
+# Vectorstore handlers — Vectorize + Vector DB sub-tabs
+# ----------------------------------------------------------------
+def handle_vectorize_preview(embedding_provider, embedding_key, downloads_list):
+    """Compute embeddings for the first 10 training sentences and show them."""
+    dl = list(downloads_list or [])
+    if not VECTORSTORE_OK:
+        return pd.DataFrame(), "vectorstore unavailable — check build logs", dl, dl
+
+    try:
+        rows = vectorstore.preview_vectors(
+            n=10,
+            embedding_provider=embedding_provider,
+            embedding_api_key=embedding_key,
+        )
+    except Exception as e:
+        return (
+            pd.DataFrame(),
+            f"Embedding failed on provider `{embedding_provider}`: {e}",
+            dl, dl,
+        )
+
+    df = pd.DataFrame(rows)
+    status = (
+        f"**Embedding provider:** `{embedding_provider}`  \n"
+        f"**Vector dim:** {rows[0]['vector_dim'] if rows else '?'}  \n"
+        f"Showing first 10 sentences with the first 8 of the vector dimensions."
+    )
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "vectorize_preview",
+        "embedding_provider": embedding_provider,
+        "preview_rows": rows,
+    }
+    path = save_json_artifact(artifact, "vectors_preview")
+    dl.append(path)
+    return df, status, dl, dl
+
+
+def handle_vector_index(embedding_provider, embedding_key, downloads_list):
+    """Embed all 100 sentences and write them to ChromaDB."""
+    dl = list(downloads_list or [])
+    if not VECTORSTORE_OK:
+        return "vectorstore unavailable — check build logs", dl, dl
+
+    try:
+        result = vectorstore.index_training_data(
+            embedding_provider=embedding_provider,
+            embedding_api_key=embedding_key,
+        )
+    except Exception as e:
+        return (
+            f"Indexing failed on provider `{embedding_provider}`: {e}",
+            dl, dl,
+        )
+
+    status = (
+        f"**Indexed {result['indexed']} sentences** into ChromaDB collection "
+        f"`{result['collection_name']}`.  \n"
+        f"**Vector dim:** {result['vector_dim']}  \n"
+        f"**Embedding provider:** `{result['embedding_provider']}`  \n"
+        f"**Embedding model:** `{result['embedding_model']}`  \n"
+        f"**Persist dir:** `{result['persist_dir']}`"
+    )
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "vector_index",
+        **result,
+    }
+    path = save_json_artifact(artifact, "vector_index")
+    dl.append(path)
+    return status, dl, dl
+
+
+def handle_vector_search(query, n_results,
+                         embedding_provider, embedding_key, downloads_list):
+    """Semantic search — embed query and retrieve top-N nearest sentences."""
+    dl = list(downloads_list or [])
+    if not VECTORSTORE_OK:
+        return pd.DataFrame(), "vectorstore unavailable — check build logs", dl, dl
+
+    if not query or not query.strip():
+        return pd.DataFrame(), "Enter a query to search.", dl, dl
+
+    try:
+        hits = vectorstore.search(
+            query.strip(),
+            n_results=int(n_results),
+            embedding_provider=embedding_provider,
+            embedding_api_key=embedding_key,
+        )
+    except Exception as e:
+        return (
+            pd.DataFrame(),
+            f"Search failed on provider `{embedding_provider}`: {e}",
+            dl, dl,
+        )
+
+    if not hits:
+        return (
+            pd.DataFrame(),
+            "No results. Have you indexed the collection yet? "
+            "Click 'Index all 100 sentences' in the Vector DB tab first. "
+            "Note: indexing and searching must use the SAME embedding provider "
+            "because vector dimensions differ between providers.",
+            dl, dl,
+        )
+
+    df = pd.DataFrame([
+        {
+            "rank": i + 1,
+            "similarity": round(h["similarity"], 4),
+            "label": h["label"],
+            "sentence": h["sentence"],
+        }
+        for i, h in enumerate(hits)
+    ])
+    status = f"**Query:** `{query}` — found {len(hits)} nearest neighbors"
+
+    artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "vector_search",
+        "query": query,
+        "n_results": int(n_results),
+        "embedding_provider": embedding_provider,
+        "hits": hits,
+    }
+    path = save_json_artifact(artifact, "vector_search")
+    dl.append(path)
+    return df, status, dl, dl
+
+
+def handle_vector_clear(downloads_list):
+    """Drop all rows from the Chroma collection."""
+    dl = list(downloads_list or [])
+    if not VECTORSTORE_OK:
+        return "vectorstore unavailable", dl, dl
+
+    result = vectorstore.clear_collection()
+    stats = vectorstore.collection_stats()
+    status = f"**Cleared {result['cleared']} vectors.** Collection now has {stats['count']} rows."
+    return status, dl, dl
+
+
+def clear_vectorize_preview():
+    return pd.DataFrame(), "Click 'Preview embeddings' to see sentence vectors."
+
+
+# ----------------------------------------------------------------
+# Main chat handler
+# ----------------------------------------------------------------
+# Only the two raw-SDK backends (Workflow, Simple Python Agent) respect
+# the chosen LLM provider. Framework backends are pinned to Mistral
+# because each framework wires its LLM differently and swapping them
+# per-provider is a larger rewrite.
+PROVIDER_AWARE_BACKENDS = {"Workflow", "Simple Python Agent"}
+
+
+def process_message(user_message, mode, llm_provider, llm_key,
+                    chat_history, loaded_context, downloads_list):
+    dl = list(downloads_list or [])
+
+    if not user_message or not user_message.strip():
+        return chat_history, pd.DataFrame(), "", pd.DataFrame(), "", dl, dl, ""
+
+    backend = BACKENDS.get(mode)
+    if backend is None:
+        return chat_history, pd.DataFrame(), "", pd.DataFrame(), \
+               f"# Unknown backend: {mode}", dl, dl, ""
+
+    # Framework backends always use Mistral; raw-SDK backends use chosen provider
+    effective_provider = llm_provider if mode in PROVIDER_AWARE_BACKENDS else "Mistral"
+
+    try:
+        if mode in PROVIDER_AWARE_BACKENDS:
+            client = backend.get_client(llm_key, provider=effective_provider)
+        else:
+            client = backend.get_client(llm_key)
+    except Exception as e:
+        err = f"# Could not create client for {effective_provider}: {e}"
+        return chat_history, pd.DataFrame(), "", pd.DataFrame(), err, dl, dl, ""
+
+    # ----------------------------------------------------------------
+    # Dispatch: ringmaster-aware backend vs legacy backend
+    # ----------------------------------------------------------------
+    is_ringmaster = hasattr(backend, "run_ringmaster")
+
+    if is_ringmaster:
+        # Ringmaster receives the raw user message plus a context dict
+        # holding session state. The supervisor calls check_data_status
+        # as its first tool, so we must NOT prefix the message with the
+        # loaded data the way legacy backends do.
+        ringmaster_context = {
+            "loaded_context": loaded_context or "",
+            "llm_provider": effective_provider,
+            "llm_key": llm_key or "",
+            "cgt_result": None,
+            "cta_result": None,
+        }
+        try:
+            result = backend.run_ringmaster(client, user_message, ringmaster_context)
+        except Exception as e:
+            err_reply = f"(error from {mode} / {effective_provider}: {e})"
+            new_history = (chat_history or []) + [
+                {"role": "user", "content": user_message},
+                {"role": "assistant", "content": err_reply},
+            ]
+            return new_history, pd.DataFrame(), "", pd.DataFrame(), "", dl, dl, ""
+    else:
+        # Legacy path: prefix loaded_context into the message text, call
+        # backend.run(client, message) or backend.run(client, message, provider=...)
+        if loaded_context:
+            effective_message = (
+                f"Available data:\n{loaded_context[:MAX_CONTEXT_CHARS]}\n\n"
+                f"User question: {user_message}"
+            )
+        else:
+            effective_message = user_message
+
+        try:
+            if mode in PROVIDER_AWARE_BACKENDS:
+                result = backend.run(client, effective_message, provider=effective_provider)
+            else:
+                result = backend.run(client, effective_message)
+        except Exception as e:
+            err_reply = f"(error from {mode} / {effective_provider}: {e})"
+            new_history = (chat_history or []) + [
+                {"role": "user", "content": user_message},
+                {"role": "assistant", "content": err_reply},
+            ]
+            return new_history, pd.DataFrame(), "", pd.DataFrame(), "", dl, dl, ""
+
+    new_history = (chat_history or []) + [
+        {"role": "user", "content": user_message},
+        {"role": "assistant", "content": result["reply"]},
+    ]
+
+    steps_df, extracted_json, chart_df, code_snippet = build_outputs(
+        user_message, mode, result
+    )
+
+    # For the artifact log, record what was actually sent to the backend.
+    # Ringmaster receives the raw user_message; legacy backends may receive
+    # the prefixed effective_message.
+    logged_effective = effective_message if not is_ringmaster else user_message
+
+    run_artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": f"chat_run_{mode.lower()}",
+        "mode": mode,
+        "llm_provider": effective_provider,
+        "user_message": user_message,
+        "effective_message": logged_effective,
+        "reply": result["reply"],
+        "steps": result["steps"],
+        "extracted": result["extracted"],
+    }
+    run_path = save_json_artifact(run_artifact, f"run_{mode.lower()}")
+    dl.append(run_path)
+
+    return (
+        new_history, steps_df, extracted_json, chart_df, code_snippet,
+        dl, dl, "",
+    )
+
+
+# ----------------------------------------------------------------
+# Form submission — saves a form JSON, then routes through process_message
+# ----------------------------------------------------------------
+def submit_form(task_type, operation, num_a, num_b, city, notes,
+                mode, llm_provider, llm_key, chat_history, loaded_context, downloads_list):
+    dl = list(downloads_list or [])
+
+    form_artifact = {
+        "timestamp": datetime.now().isoformat(),
+        "source_type": "form_submission",
+        "task_type": task_type,
+        "operation": operation,
+        "number_a": num_a,
+        "number_b": num_b,
+        "city": city,
+        "notes": notes,
+    }
+    form_path = save_json_artifact(form_artifact, "form")
+    dl.append(form_path)
+
+    builders = {
+        "Math": lambda: f"Calculate {num_a} {operation.lower()} {num_b}",
+        "Weather": lambda: f"What is the weather in {city}?",
+        "General": lambda: notes or "Hello",
+    }
+    user_message = builders[task_type]()
+    return process_message(user_message, mode, llm_provider, llm_key,
+                           chat_history, loaded_context, dl)
+
+
+def clear_form():
+    return "Math", "Add", 0, 0, "", ""
+
+
+def new_chat(downloads_list):
+    dl = list(downloads_list or [])
+    return [], pd.DataFrame(), "", pd.DataFrame(), "", dl, dl, ""
+
+
+# ============================================================================
+# ZONE 4 — UI definition (gr.Blocks)
+# ============================================================================
+# Layout tree:
+#   Row
+#   +-- Column (sidebar): settings, mode, new chat, tab guide
+#   +-- Column (main):
+#       +-- Chatbot (display)
+#       +-- Row: chat_input + send_btn
+#       +-- Tabs (top-level)
+#           +-- Data sources  (Tab)
+#           |   +-- Tabs (inner)
+#           |       +-- Web scraping
+#           |       +-- PDF upload
+#           |       +-- CSV / Excel upload
+#           +-- Form          (Tab)
+#           +-- Results       (Tab)
+#           |   +-- Tabs (inner)
+#           |       +-- Table
+#           |       +-- Code
+#           |       +-- Extracted
+#           +-- Visuals       (Tab)
+#           +-- Downloads     (Tab)
+#
+# TWO gr.State OBJECTS persist values across clicks:
+#   loaded_context_state -> text from the last loaded data source
+#   downloads_state      -> list of file paths, grows as artifacts are created
+# ----------------------------------------------------------------
+# UI
+# ----------------------------------------------------------------
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange"), title="Agentic AI Tutorial") as demo:
+    gr.Markdown("# Agentic AI Tutorial — Seven Backends, One Chat")
+    gr.Markdown(
+        "A hands-on comparison of seven ways to build the same agent: "
+        "**Workflow**, **Simple Python Agent** (raw Mistral SDK), "
+        "**LangChain**, **LangGraph** (supervisor pattern), "
+        "**smolagents** (code-writing), **CrewAI** (multi-agent), "
+        "and **LlamaIndex**. Same Mistral LLM, same tools, different orchestration. "
+        "Every input and every run is saved as a timestamped JSON file in the Downloads tab."
+    )
+
+    loaded_context_state = gr.State("")
+    downloads_state = gr.State([])
+    trained_state = gr.State(None)
+    # Phase 1 Familiarization state — canonical corpus CSV (list of dicts)
+    p1_corpus_state = gr.State([])
+
+    with gr.Row():
+
+        # ---------------- Sidebar ----------------
+        with gr.Column(scale=1, min_width=220):
+            new_chat_btn = gr.Button("+ New chat", variant="primary")
+
+            gr.Markdown("### LLM provider")
+            gr.Markdown(
+                "*This release is locked to **Mistral**. Other providers "
+                "(OpenAI, Anthropic, Gemini, Llama, Qwen, DeepSeek) will "
+                "be enabled in a future release once the ringmaster workflow "
+                "is stable.*"
+            )
+            llm_provider_select = gr.Dropdown(
+                choices=list(providers.LLM_PROVIDERS.keys()),
+                value="Mistral",
+                label="LLM provider",
+                interactive=False,
+                info="Locked to Mistral for this release.",
+            )
+            llm_key_input = gr.Textbox(
+                label="LLM API key",
+                type="password",
+                placeholder="paste your Mistral API key",
+            )
+
+            gr.Markdown("### Embedding provider")
+            gr.Markdown(
+                "*This release is locked to **MiniLM (local)**. MiniLM is "
+                "a 384-dim sentence-transformers model that downloads once "
+                "on first use (~90 MB) and then runs locally with no API "
+                "key. Other embedding providers will be enabled in a "
+                "future release.*"
+            )
+            embedding_provider_select = gr.Dropdown(
+                choices=list(providers.EMBEDDING_PROVIDERS.keys()),
+                value="MiniLM (local)",
+                label="Embedding provider",
+                interactive=False,
+                info="Locked to MiniLM (local) for this release.",
+            )
+            embedding_key_input = gr.Textbox(
+                label="Embedding API key",
+                type="password",
+                placeholder="not needed for MiniLM (local)",
+                interactive=False,
+            )
+
+            gr.Markdown("### Agent backend")
+            gr.Markdown(
+                "*This release is locked to **Research Assistant enabled by "
+                "Vector Embeddings** — the chat-driven coordinator that calls "
+                "the research workbenches as tools. Other backends (Workflow, "
+                "Simple Python Agent, LangChain, LangGraph, smolagents, "
+                "CrewAI, LlamaIndex) will be enabled in a future release.*"
+            )
+            _mode_choices = list(BACKENDS.keys()) or ["(no backends loaded)"]
+            # Prefer Research Assistant as the default if present
+            if "Research Assistant enabled by Vector Embeddings" in _mode_choices:
+                _mode_default = "Research Assistant enabled by Vector Embeddings"
+            else:
+                _mode_default = _mode_choices[0]
+            mode_select = gr.Radio(
+                choices=_mode_choices,
+                value=_mode_default,
+                label="Backend",
+                interactive=False,
+                info="Locked to Research Assistant for this release.",
+            )
+
+            gr.Markdown("### Tab guide")
+            gr.Markdown(
+                "**Inputs**\n"
+                "- Data sources\n"
+                "- Form\n\n"
+                "**Processing / Analysis**\n"
+                "- Supervised Machine Learning\n"
+                "- Unsupervised Machine Learning\n"
+                "- Vector Processing\n\n"
+                "**Outputs**\n"
+                "- Results\n"
+                "- Visuals\n"
+                "- Downloads"
+            )
+
+        # ---------------- Main area ----------------
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(height=320, label="Conversation")
+
+            with gr.Row():
+                chat_input = gr.Textbox(
+                    placeholder="Message the agent...",
+                    show_label=False,
+                    scale=5,
+                )
+                send_btn = gr.Button("Send", scale=1, variant="primary")
+
+            with gr.Tabs():
+
+                # =================== INPUTS ===================
+                # =================== INPUTS ===================
+                with gr.Tab("Inputs"):
+                    with gr.Tabs():
+                        with gr.Tab("Data sources"):
+                            gr.Markdown(
+                                "Load external data as context. Each load is saved "
+                                "as a timestamped JSON file in the Downloads tab."
+                            )
+
+                            with gr.Tabs():
+
+                                with gr.Tab("Web scraping"):
+                                    url_input = gr.Textbox(
+                                        label="URL", placeholder="https://example.com",
+                                    )
+                                    with gr.Row():
+                                        scrape_btn = gr.Button("Scrape", variant="primary")
+                                        scrape_clear_btn = gr.Button("Clear")
+                                    scrape_preview = gr.Textbox(
+                                        label="Extracted text", lines=8, interactive=False,
+                                    )
+                                    scrape_status = gr.Markdown("Nothing loaded.")
+
+                                with gr.Tab("PDF upload"):
+                                    pdf_input = gr.File(
+                                        label="Upload PDF", file_types=[".pdf"],
+                                    )
+                                    with gr.Row():
+                                        pdf_extract_btn = gr.Button("Extract text", variant="primary")
+                                        pdf_clear_btn = gr.Button("Clear")
+                                    pdf_preview = gr.Textbox(
+                                        label="Extracted text", lines=8, interactive=False,
+                                    )
+                                    pdf_status = gr.Markdown("Nothing loaded.")
+
+                                with gr.Tab("CSV / Excel upload"):
+                                    csv_input = gr.File(
+                                        label="Upload CSV or Excel",
+                                        file_types=[".csv", ".xlsx", ".xls"],
+                                    )
+                                    with gr.Row():
+                                        csv_load_btn = gr.Button("Load", variant="primary")
+                                        csv_clear_btn = gr.Button("Clear")
+                                    csv_preview = gr.Dataframe(
+                                        label="Preview (first 20 rows)", interactive=False,
+                                    )
+                                    csv_status = gr.Markdown("Nothing loaded.")
+
+                                with gr.Tab("ML examples"):
+                                    gr.Markdown(
+                                        "Load the built-in catalog of labeled ML paper "
+                                        "sentences as context. No upload needed — the "
+                                        "dataset lives in examples.py."
+                                    )
+                                    with gr.Row():
+                                        ml_load_btn = gr.Button("Load catalog", variant="primary")
+                                        ml_clear_btn = gr.Button("Clear")
+                                    ml_preview = gr.Textbox(
+                                        label="Catalog preview", lines=10, interactive=False,
+                                    )
+                                    ml_status = gr.Markdown("Nothing loaded.")
+
+                        with gr.Tab("Form"):
+                            gr.Markdown(
+                                "Fill structured fields and hit Submit. Generates a chat "
+                                "message and saves the form fields as their own JSON file."
+                            )
+                            form_task = gr.Dropdown(
+                                ["Math", "Weather", "General"],
+                                value="Math", label="Task type",
+                            )
+                            form_op = gr.Dropdown(
+                                ["Add", "Multiply"],
+                                value="Add", label="Operation (Math only)",
+                            )
+                            with gr.Row():
+                                form_a = gr.Number(label="Number A", value=0)
+                                form_b = gr.Number(label="Number B", value=0)
+                            form_city = gr.Textbox(
+                                label="City (Weather only)", placeholder="e.g. Tokyo",
+                            )
+                            form_notes = gr.Textbox(
+                                label="Notes (General only)", lines=2,
+                            )
+                            with gr.Row():
+                                form_submit = gr.Button("Submit", variant="primary")
+                                form_clear = gr.Button("Clear")
+
+                        # =================== SUPERVISED MACHINE LEARNING ===================
+                # =================== PROCESSING / ANALYSIS ===================
+                with gr.Tab("Processing / Analysis"):
+                    with gr.Tabs():
+                        with gr.Tab("Supervised Machine Learning"):
+                            gr.Markdown(
+                                "**Supervised ML** on the built-in 100-sentence customer-feedback "
+                                "dataset (6 labels). Uses semantic embeddings from "
+                                "`sentence-transformers/all-MiniLM-L6-v2` + logistic regression. "
+                                "No LLM involved."
+                            )
+
+                            with gr.Tabs():
+
+                                with gr.Tab("Dataset"):
+                                    gr.Markdown(
+                                        "The 100 labeled sentences the classifier learns from. "
+                                        "Six labels, roughly balanced: positive_review, "
+                                        "negative_review, question, complaint, compliment, "
+                                        "feature_request."
+                                    )
+                                    sup_label_filter = gr.Dropdown(
+                                        choices=["(all)"] + list(sorted(
+                                            {e["label"] for e in TRAINING_EXAMPLES}
+                                        )),
+                                        value="(all)",
+                                        label="Filter by label",
+                                    )
+                                    sup_dataset_view = gr.Dataframe(
+                                        value=pd.DataFrame(TRAINING_EXAMPLES),
+                                        label=f"Training dataset ({len(TRAINING_EXAMPLES)} sentences)",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                with gr.Tab("Train"):
+                                    gr.Markdown(
+                                        "Click Train to fit a logistic regression classifier on "
+                                        "semantic embeddings of 80 sentences (stratified split), "
+                                        "then evaluate on the remaining 20."
+                                    )
+                                    with gr.Row():
+                                        train_btn = gr.Button("Train classifier", variant="primary")
+                                        train_clear_btn = gr.Button("Clear")
+                                    train_status = gr.Markdown("Not trained yet.")
+                                    confusion_out = gr.Dataframe(
+                                        label="Confusion matrix (rows=actual, cols=predicted)",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                with gr.Tab("Predict"):
+                                    gr.Markdown(
+                                        "Type a new sentence to classify. The classifier must "
+                                        "be trained first — go to the Train sub-tab and click "
+                                        "Train classifier before using this panel."
+                                    )
+                                    predict_input = gr.Textbox(
+                                        label="Sentence",
+                                        placeholder="e.g. this product is amazing",
+                                        lines=2,
+                                    )
+                                    predict_btn = gr.Button("Predict", variant="primary")
+                                    predict_out = gr.Markdown("No prediction yet.")
+
+                        # =================== UNSUPERVISED MACHINE LEARNING ===================
+                        with gr.Tab("Unsupervised Machine Learning"):
+                            gr.Markdown(
+                                "**Unsupervised ML** on the same 100-sentence dataset with the "
+                                "labels hidden from the algorithm. Uses semantic embeddings from "
+                                "`sentence-transformers/all-MiniLM-L6-v2` + **Hierarchical "
+                                "Agglomerative Clustering** with cosine distance."
+                            )
+
+                            with gr.Tabs():
+
+                                with gr.Tab("Dataset"):
+                                    gr.Markdown(
+                                        "The 100 sentences the clustering algorithm sees. "
+                                        "Labels are hidden here on purpose — unsupervised "
+                                        "learning works without them. After clustering runs, "
+                                        "the Cluster sub-tab compares discovered clusters to "
+                                        "the true labels so you can see what the algorithm "
+                                        "figured out on its own."
+                                    )
+                                    unsup_dataset_view = gr.Dataframe(
+                                        value=pd.DataFrame(
+                                            [{"sentence": e["sentence"]} for e in TRAINING_EXAMPLES]
+                                        ),
+                                        label=f"Sentences only ({len(TRAINING_EXAMPLES)} rows, no labels)",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                with gr.Tab("Cluster"):
+                                    gr.Markdown(
+                                        "**Hierarchical Agglomerative Clustering** on "
+                                        "semantic embeddings. Clusters emerge from a "
+                                        "similarity threshold instead of a fixed count. "
+                                        "Small clusters become **noise**. Each surviving "
+                                        "cluster exposes its **centroid** and the "
+                                        "**N nearest-to-centroid** sentences as "
+                                        "representatives — optionally sent to an LLM "
+                                        "for an automatic cluster label."
+                                    )
+                                    cluster_sim = gr.Slider(
+                                        0.40, 0.90, value=0.60, step=0.05,
+                                        label="Similarity threshold",
+                                        info="Minimum cosine similarity between vectors to merge.",
+                                    )
+                                    cluster_min = gr.Slider(
+                                        2, 10, value=3, step=1,
+                                        label="Minimum cluster size",
+                                        info="Clusters smaller than this are reassigned to noise.",
+                                    )
+                                    cluster_nnear = gr.Slider(
+                                        1, 10, value=3, step=1,
+                                        label="N nearest-to-centroid",
+                                        info="How many representative sentences to pick per cluster.",
+                                    )
+                                    cluster_llm_toggle = gr.Checkbox(
+                                        label="Label clusters with LLM",
+                                        value=False,
+                                        info="Sends the N nearest sentences per cluster to the sidebar LLM provider for a short label. Adds ~2s per cluster.",
+                                    )
+                                    with gr.Row():
+                                        cluster_btn = gr.Button("Cluster", variant="primary")
+                                        cluster_clear_btn = gr.Button("Clear")
+                                    cluster_status = gr.Markdown("Not clustered yet.")
+                                    cluster_out = gr.Dataframe(
+                                        label="Sentence-level cluster table",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                        # =================== VECTOR PROCESSING ===================
+                        with gr.Tab("Vector Processing"):
+                            gr.Markdown(
+                                "**Semantic vector storage and retrieval** using ChromaDB "
+                                "as a persistent on-disk vector database.  \n"
+                                "Same embedding model as Supervised / Unsupervised ML "
+                                "(`sentence-transformers/all-MiniLM-L6-v2`), 384 dimensions, "
+                                "cosine similarity. Every sentence is stored with its label "
+                                "as metadata so retrieval results include ground-truth labels."
+                            )
+
+                            with gr.Tabs():
+
+                                with gr.Tab("Vectorize"):
+                                    gr.Markdown(
+                                        "See what a sentence embedding actually looks like. "
+                                        "Click Preview to compute embeddings for the first "
+                                        "10 training sentences and show the first 8 dimensions "
+                                        "of each 384-dim vector."
+                                    )
+                                    with gr.Row():
+                                        vectorize_btn = gr.Button(
+                                            "Preview embeddings", variant="primary",
+                                        )
+                                        vectorize_clear_btn = gr.Button("Clear")
+                                    vectorize_status = gr.Markdown(
+                                        "Click 'Preview embeddings' to see sentence vectors."
+                                    )
+                                    vectorize_out = gr.Dataframe(
+                                        label="Sentences with embedding preview",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                with gr.Tab("Vector DB"):
+                                    gr.Markdown(
+                                        "**ChromaDB-backed persistent vector store.**  \n"
+                                        "Step 1: Click 'Index all 100 sentences' once per "
+                                        "session to embed the training data and write it to "
+                                        "the local Chroma collection.  \n"
+                                        "Step 2: Type a query and click 'Semantic search' to "
+                                        "retrieve the nearest training sentences. The results "
+                                        "show cosine similarity and the ground-truth label "
+                                        "from the metadata."
+                                    )
+
+                                    gr.Markdown("### Index")
+                                    with gr.Row():
+                                        vector_index_btn = gr.Button(
+                                            "Index all 100 sentences", variant="primary",
+                                        )
+                                        vector_clear_btn = gr.Button("Clear index")
+                                    vector_index_status = gr.Markdown("Not indexed yet.")
+
+                                    gr.Markdown("### Semantic search")
+                                    vector_query = gr.Textbox(
+                                        label="Query",
+                                        placeholder="e.g. the app keeps crashing",
+                                        lines=2,
+                                    )
+                                    vector_n = gr.Slider(
+                                        1, 10, value=5, step=1,
+                                        label="Number of results",
+                                    )
+                                    vector_search_btn = gr.Button(
+                                        "Semantic search", variant="primary",
+                                    )
+                                    vector_search_status = gr.Markdown(
+                                        "Enter a query and click 'Semantic search'."
+                                    )
+                                    vector_search_out = gr.Dataframe(
+                                        label="Nearest neighbors (cosine similarity)",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                        # =================== OUTPUTS ===================
+                # =================== OUTPUTS ===================
+                with gr.Tab("Outputs"):
+                    with gr.Tabs():
+                        with gr.Tab("Results"):
+                            with gr.Tabs():
+                                with gr.Tab("Table"):
+                                    gr.Markdown("Step log for the most recent run.")
+                                    table_out = gr.Dataframe(
+                                        headers=["step", "type", "tool", "args", "result"],
+                                        label="",
+                                        wrap=True,
+                                    )
+                                with gr.Tab("Code"):
+                                    gr.Markdown("Python snippets for the most recent run.")
+                                    code_out = gr.Code(language="python", label="")
+                                with gr.Tab("Extracted"):
+                                    gr.Markdown("What the agent parsed from the most recent run.")
+                                    extracted_out = gr.Code(language="json", label="")
+
+                        with gr.Tab("Visuals"):
+                            gr.Markdown("Tool-call counts for the most recent run.")
+                            chart_out = gr.BarPlot(
+                                x="tool", y="count",
+                                title="", tooltip=["tool", "count"],
+                                height=280,
+                            )
+
+                        with gr.Tab("Downloads"):
+                            gr.Markdown(
+                                "Every input and every run is saved here as a "
+                                "timestamped JSON file. Files accumulate across the session."
+                            )
+                            downloads_files_out = gr.File(
+                                label="All artifacts (timestamped JSON)",
+                                file_count="multiple",
+                                interactive=False,
+                            )
+                # ======================= RESEARCHER WORKBENCH (parent tab) =======================
+                with gr.Tab("Researcher Workbench"):
+                    gr.Markdown(
+                        "**Researcher Workbench** groups two self-contained "
+                        "LangGraph supervisor workflows that apply published "
+                        "research methodologies to the training data. Each "
+                        "methodology has its own sub-tab with its own state, "
+                        "prompts, tools, and supervisor."
+                    )
+                    with gr.Tabs():
+
+                        # ==================== COMPUTATIONAL GROUNDED THEORY ====================
+                        with gr.Tab("Computational Grounded Theory"):
+                            gr.Markdown(
+                                "**Nelson 2020** — three-step methodological framework. "
+                                "A LangGraph supervisor routes the request through three "
+                                "phase nodes in order:  \n"
+                                "1. **Pattern Detection** — inductive clustering + LLM labeling (real)  \n"
+                                "2. **Pattern Refinement** — interpretive review (placeholder)  \n"
+                                "3. **Pattern Confirmation** — classifier validation (placeholder)  \n\n"
+                                "Maps to traditional grounded theory: open -> axial -> selective coding."
+                            )
+                            wb_cgt_msg = gr.Textbox(
+                                label="Request to the supervisor",
+                                value="Run computational grounded theory on the training data.",
+                                lines=2,
+                            )
+                            with gr.Row():
+                                wb_cgt_sim = gr.Slider(
+                                    0.40, 0.90, value=0.60, step=0.05,
+                                    label="Similarity threshold",
+                                )
+                                wb_cgt_min = gr.Slider(
+                                    2, 10, value=3, step=1,
+                                    label="Minimum cluster size",
+                                )
+                                wb_cgt_nnear = gr.Slider(
+                                    1, 10, value=3, step=1,
+                                    label="N nearest to centroid",
+                                )
+                            with gr.Row():
+                                wb_cgt_run = gr.Button("Run Workbench", variant="primary")
+                            wb_cgt_reply = gr.Markdown("Not run yet.")
+                            gr.Markdown("### Graph execution trace")
+                            wb_cgt_trace = gr.Dataframe(
+                                headers=["step", "node", "action", "detail"],
+                                label="Supervisor routing + node invocations",
+                                interactive=False,
+                                wrap=True,
+                            )
+                            gr.Markdown("### Pattern Detection output (Step 1)")
+                            wb_cgt_sentences = gr.Dataframe(
+                                label="Sentences with cluster id + LLM cluster label",
+                                interactive=False,
+                                wrap=True,
+                            )
+
+                        # ==================== COMPUTATIONAL THEMATIC ANALYSIS ====================
+                        with gr.Tab("Computational Thematic Analysis"):
+                            gr.Markdown(
+                                "**Braun & Clarke 2006** — six-phase reflexive thematic analysis. "
+                                "This workbench groups two complementary paths:  \n"
+                                "- **Workbench** — the LangGraph supervisor approach (Phase 2 real, rest placeholders)  \n"
+                                "- **Phase 1 — Familiarization** — active-reading dialogue via grounded "
+                                "dialogue partners, followed by researcher confirmation of each initial noticing"
+                            )
+                            with gr.Tabs():
+                                # ------------ Existing Workbench path ------------
+                                with gr.Tab("Workbench (LangGraph)"):
+                                    gr.Markdown(
+                                        "Six-phase supervisor routing via LangGraph:  \n"
+                                        "1. **Familiarization** (placeholder)  \n"
+                                        "2. **Generating Initial Codes** — LLM codes each sentence (real)  \n"
+                                        "3. **Searching for Themes** (placeholder)  \n"
+                                        "4. **Reviewing Themes** (placeholder)  \n"
+                                        "5. **Defining and Naming Themes** (placeholder)  \n"
+                                        "6. **Producing the Report** (placeholder)"
+                                    )
+                                    wb_cta_msg = gr.Textbox(
+                                        label="Request to the supervisor",
+                                        value="Run reflexive thematic analysis on the training data.",
+                                        lines=2,
+                                    )
+                                    wb_cta_max = gr.Slider(
+                                        5, 100, value=20, step=5,
+                                        label="Max sentences to code",
+                                        info="One LLM call per sentence in Phase 2. "
+                                             "Default 20 keeps runtime under ~40 seconds.",
+                                    )
+                                    wb_cta_run = gr.Button("Run Workbench", variant="primary")
+                                    wb_cta_reply = gr.Markdown("Not run yet.")
+                                    gr.Markdown("### Graph execution trace")
+                                    wb_cta_trace = gr.Dataframe(
+                                        headers=["step", "node", "action", "detail"],
+                                        label="Supervisor routing + node invocations",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+                                    gr.Markdown("### Phase 2 output — Initial Codes")
+                                    wb_cta_codes = gr.Dataframe(
+                                        label="Sentences with LLM-generated codes",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                # ------------ NEW: Phase 1 — Familiarization path ------------
+                                with gr.Tab("Phase 1 — Familiarization"):
+                                    gr.Markdown(
+                                        "## Phase 1 — Familiarizing Yourself With Your Data\n\n"
+                                        "*Braun & Clarke 2006, Phase 1: \"immerse yourself in the data "
+                                        "to the extent that you are familiar with the depth and breadth "
+                                        "of the content\"* (p. 87).\n\n"
+                                        "This workbench implements Phase 1 through a three-step "
+                                        "active-reading protocol. Two complementary dialogue partners "
+                                        "(implemented as Gemini Gems backed by NotebookLM) guide the "
+                                        "researcher through immersion and reflexive engagement, "
+                                        "followed by researcher confirmation of every initial noticing "
+                                        "against its source evidence.\n\n"
+                                        "**Step 1 — Familiarization Facilitator** — an active-reading "
+                                        "dialogue partner that asks grounded questions, surfaces "
+                                        "patterns, and prompts the researcher to articulate initial "
+                                        "noticings. Every response is anchored in direct quotation "
+                                        "from the source corpus.  \n"
+                                        "**Step 2 — Reflexive Companion** — a critical dialogue partner "
+                                        "that challenges the researcher's initial noticings, probes "
+                                        "reflexive positioning, and verifies dataset immersion "
+                                        "coverage across all sources.  \n"
+                                        "**Step 3 — Researcher Confirmation** — the researcher reviews "
+                                        "each initial noticing against its source sentence and "
+                                        "confirms, refines, or rejects it. This forces active "
+                                        "engagement with the evidence and is the researcher's own "
+                                        "analytic act — not the dialogue partner's.\n\n"
+                                        "**Braun & Clarke 2006 compliance target:** ≥90% when both "
+                                        "dialogue partners are engaged with iteration. Unclosable "
+                                        "gaps documented in COMPLIANCE.md: felt sense of the data "
+                                        "(phenomenological, unautomatable), and time-on-task "
+                                        "verification (researcher's own responsibility)."
+                                    )
+
+                                    # ---- Corpus loader ----
+                                    gr.Markdown("### Corpus — Canonical CSV")
+                                    gr.Markdown(
+                                        "*Phase 1 consumes a canonical CSV with five columns: "
+                                        "`doc_id`, `doc_title`, `section`, `sub_section`, `sentence`. "
+                                        "Inputs tab transformers (PDF→CSV, web scrape→CSV) will "
+                                        "produce this schema in a future round. For pipeline testing, "
+                                        "load the built-in test corpus.*"
+                                    )
+                                    with gr.Row():
+                                        p1_load_test_btn = gr.Button(
+                                            "Load test_phase1.csv",
+                                            variant="secondary",
+                                        )
+                                        p1_upload_csv = gr.File(
+                                            label="Or upload your own canonical CSV",
+                                            file_types=[".csv"],
+                                        )
+                                    p1_corpus_status = gr.Markdown("No corpus loaded.")
+                                    p1_corpus_preview = gr.Dataframe(
+                                        label="Corpus preview",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                    # ---- Step 1 — Familiarization Facilitator ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Step 1 — Familiarization Facilitator")
+                                    gr.Markdown(
+                                        "An active-reading dialogue partner grounded in your "
+                                        "corpus via NotebookLM. Copy the instructions below, "
+                                        "create a Gem in Gemini with your NotebookLM notebook "
+                                        "attached under Knowledge, engage in the active-reading "
+                                        "dialogue, then paste your outputs here."
+                                    )
+                                    p1_facilitator_instructions = gr.Textbox(
+                                        label="Familiarization Facilitator instructions (paste into Gemini Gem)",
+                                        value="(instructions will be drafted in next round)",
+                                        lines=8,
+                                        max_lines=20,
+                                    )
+                                    p1_facilitator_memo = gr.Textbox(
+                                        label="Paste: Familiarization notes (Braun & Clarke 2006, Phase 1 output)",
+                                        lines=4,
+                                    )
+                                    p1_facilitator_transcript = gr.Textbox(
+                                        label="Paste: Full active-reading dialogue transcript",
+                                        lines=6,
+                                    )
+                                    p1_facilitator_citations = gr.Textbox(
+                                        label="Paste: Source evidence — quoted sentences anchoring each initial noticing",
+                                        lines=4,
+                                        info="One citation per line. Format: doc_id | section | sentence",
+                                    )
+
+                                    # ---- Step 2 — Reflexive Companion ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Step 2 — Reflexive Companion")
+                                    gr.Markdown(
+                                        "A critical dialogue partner that challenges your initial "
+                                        "noticings, probes your reflexive positioning, and verifies "
+                                        "immersion coverage across all sources. Run this after the "
+                                        "Facilitator dialogue is complete."
+                                    )
+                                    p1_companion_instructions = gr.Textbox(
+                                        label="Reflexive Companion instructions (paste into Gemini Gem)",
+                                        value="(instructions will be drafted in next round)",
+                                        lines=8,
+                                        max_lines=20,
+                                    )
+                                    p1_companion_challenges = gr.Textbox(
+                                        label="Paste: Reflexive challenges raised by Companion",
+                                        lines=4,
+                                    )
+                                    p1_companion_reflexivity = gr.Textbox(
+                                        label="Paste: Reflexive positioning statement",
+                                        lines=4,
+                                        info="Your position as researcher — assumptions, theoretical lens, relationship to the data.",
+                                    )
+                                    p1_companion_breadth = gr.Textbox(
+                                        label="Paste: Dataset immersion coverage notes",
+                                        lines=3,
+                                        info="Which sources and sections were engaged with, which remain unread.",
+                                    )
+
+                                    # ---- Step 3 — Researcher Confirmation ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Step 3 — Researcher Confirmation")
+                                    gr.Markdown(
+                                        "Review each initial noticing against its source sentence. "
+                                        "Confirm, refine, or reject each one. This is the researcher's "
+                                        "own analytic act — not the dialogue partner's. Braun & Clarke "
+                                        "2019/2021 insist that reflexive thematic analysis is *constructed* "
+                                        "by the researcher's engagement with the data, not *extracted* by a tool."
+                                    )
+                                    p1_build_table_btn = gr.Button(
+                                        "Build confirmation table from Steps 1 + 2",
+                                        variant="secondary",
+                                    )
+                                    p1_validation_table = gr.Dataframe(
+                                        headers=[
+                                            "doc_id", "doc_title", "section", "sub_section",
+                                            "sentence", "initial_noticing",
+                                            "reflexive_challenge", "researcher_confirmation",
+                                            "refined_noticing",
+                                        ],
+                                        label="Phase 1 Researcher Confirmation Table — edit the last 4 columns",
+                                        interactive=True,
+                                        wrap=True,
+                                    )
+
+                                    # ---- Save ----
+                                    gr.Markdown("---")
+                                    p1_save_btn = gr.Button(
+                                        "Save Phase 1 output (all 3 steps → JSON artifact)",
+                                        variant="primary",
+                                    )
+                                    p1_save_status = gr.Markdown("")
+
+                                # ------------ Phase 2 — Initial Coding ------------
+                                with gr.Tab("Phase 2 — Initial Coding"):
+                                    gr.Markdown(
+                                        "## Phase 2 — Generating Initial Codes\n\n"
+                                        "*Braun & Clarke 2006, Phase 2: \"Coding interesting features "
+                                        "of the data in a systematic fashion across the entire data "
+                                        "set, collating data relevant to each code\"* (p. 87).\n\n"
+                                        "This workbench implements Phase 2 through a **fully agentic "
+                                        "LangGraph architecture**. The agent loops systematically "
+                                        "across every sentence, generates both semantic and latent "
+                                        "codes, maintains a growing codebook with definitions, and "
+                                        "iterates with researcher-edited context. The researcher is "
+                                        "the final authority — human code columns always override AI.\n\n"
+                                        "**Architecture:** LangGraph supervisor + 7 agent tools "
+                                        "(read_corpus, read_phase1_context, propose_code, "
+                                        "check_codebook, add_to_codebook, flag_for_review, "
+                                        "save_iteration). Agent decides ordering, flags ambiguous "
+                                        "sentences, and avoids codebook duplication.\n\n"
+                                        "**Braun & Clarke 2006 compliance target:** ~88% with full "
+                                        "agent + 3 iterations + researcher review. Unclosable gaps: "
+                                        "reflexive engagement depth, time-on-task verification, felt "
+                                        "sense of codes (documented in COMPLIANCE.md).\n\n"
+                                        "**Round 2 status (this release):** Real LangGraph agent wired. "
+                                        "Click Run iteration 1 to invoke Mistral through the 7-tool "
+                                        "supervisor loop. Runtime: ~60-120 seconds for 30 sentences. "
+                                        "Iteration 2 reads researcher edits from iteration 1. "
+                                        "Iteration 3 is the final convergence pass."
+                                    )
+
+                                    # ---- Corpus source ----
+                                    gr.Markdown("### Corpus — inherited from Phase 1")
+                                    gr.Markdown(
+                                        "*Phase 2 reads the canonical corpus loaded in Phase 1. "
+                                        "If no corpus is loaded, go to Phase 1 → Familiarization "
+                                        "and load test_phase1.csv or your own canonical CSV first.*"
+                                    )
+                                    p2_corpus_status = gr.Markdown("No corpus loaded. Load in Phase 1 first.")
+                                    p2_refresh_btn = gr.Button(
+                                        "Refresh corpus status from Phase 1",
+                                        variant="secondary",
+                                    )
+
+                                    # ---- Phase 1 context consumption ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Phase 1 context (consumed by the agent)")
+                                    gr.Markdown(
+                                        "*The Phase 2 agent reads the researcher's reflexive "
+                                        "positioning and confirmed initial noticings from Phase 1 "
+                                        "as context. This ensures Phase 2 coding is grounded in "
+                                        "the researcher's familiarization, not starting from scratch.*"
+                                    )
+                                    p2_phase1_summary = gr.Markdown(
+                                        "*Phase 1 output will appear here after Save Phase 1.*"
+                                    )
+
+                                    # ---- Orientation — Braun & Clarke p. 84 ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Coding orientation (Braun & Clarke p. 84)")
+                                    gr.Markdown(
+                                        "*Braun & Clarke 2006 (p. 84) treat SEMANTIC vs LATENT as "
+                                        "an analysis-wide choice, not a per-sentence distinction. "
+                                        "Choose ONE orientation for this whole analysis. The agent "
+                                        "will code every sentence at the level you pick.*  \n\n"
+                                        "**Semantic** — surface content, what the text explicitly says  \n"
+                                        "**Latent** — underlying assumptions, what the text implies"
+                                    )
+                                    p2_orientation = gr.Radio(
+                                        choices=["semantic", "latent"],
+                                        value="semantic",
+                                        label="Coding orientation for this analysis",
+                                        interactive=True,
+                                    )
+
+                                    # ---- Iteration controls ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Agentic coding iterations")
+                                    gr.Markdown(
+                                        "Braun & Clarke insist on iterative refinement. Run "
+                                        "iteration 1 → review AI codes in the table → edit human "
+                                        "columns → run iteration 2 (agent reads your edits as "
+                                        "context) → review → iteration 3 → converge."
+                                    )
+                                    with gr.Row():
+                                        p2_run_iter1_btn = gr.Button(
+                                            "Run iteration 1",
+                                            variant="primary",
+                                        )
+                                        p2_run_iter2_btn = gr.Button(
+                                            "Run iteration 2 (reads your edits)",
+                                            variant="secondary",
+                                        )
+                                        p2_run_iter3_btn = gr.Button(
+                                            "Run iteration 3 (final)",
+                                            variant="secondary",
+                                        )
+                                    p2_iter_status = gr.Markdown("*No iterations run yet.*")
+
+                                    # ---- Coding table ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Initial Codes Table")
+                                    gr.Markdown(
+                                        "*Every sentence gets two code levels (semantic + latent) "
+                                        "per iteration. Edit the `human_code_iterN` columns to "
+                                        "override the agent. The `final_code` column is populated "
+                                        "from the latest human edit or the latest AI code if no "
+                                        "human edit exists.*"
+                                    )
+                                    p2_codes_table = gr.Dataframe(
+                                        headers=[
+                                            "doc_id", "doc_title", "section", "sub_section", "sentence",
+                                            "ai_code_iter1", "human_code_iter1",
+                                            "ai_code_iter2", "human_code_iter2",
+                                            "ai_code_iter3", "human_code_iter3",
+                                            "final_code", "flagged",
+                                        ],
+                                        label="Phase 2 Initial Codes — edit human_code_iterN columns",
+                                        interactive=True,
+                                        wrap=True,
+                                    )
+
+                                    # ---- Codebook ----
+                                    gr.Markdown("---")
+                                    gr.Markdown("### Codebook")
+                                    gr.Markdown(
+                                        "*Braun & Clarke 2006 require a codebook: the dictionary "
+                                        "of codes with definitions, provenance, and usage counts. "
+                                        "The agent maintains this as it codes; the researcher can "
+                                        "edit definitions directly.*"
+                                    )
+                                    p2_codebook_table = gr.Dataframe(
+                                        headers=[
+                                            "code_name", "definition", "created_by",
+                                            "provenance", "sentence_count",
+                                        ],
+                                        label="Phase 2 Codebook — edit definitions",
+                                        interactive=True,
+                                        wrap=True,
+                                    )
+
+                                    # ---- Save ----
+                                    gr.Markdown("---")
+                                    p2_save_btn = gr.Button(
+                                        "Save Phase 2 output (codes + codebook → JSON artifact)",
+                                        variant="primary",
+                                    )
+                                    p2_save_status = gr.Markdown("")
+
+
+
+
+
+                                # ------------ Phase 3 -- Searching for Themes ------------
+                                with gr.Tab("Phase 3 -- Searching for Themes"):
+                                    gr.Markdown(
+                                        "## Phase 3 -- Searching for Themes\n\n"
+                                        "*Braun & Clarke 2006, Phase 3: \"Collating codes into potential "
+                                        "themes, gathering all data relevant to each potential theme\" (p. 89).*\n\n"
+                                        "This phase clusters the Phase 2 codebook codes by semantic similarity "
+                                        "(sentence-transformers embeddings + agglomerative clustering), then "
+                                        "proposes a candidate theme name and description for each cluster "
+                                        "via one Mistral call per cluster.\n\n"
+                                        "**Researcher action:** review the candidate themes, edit "
+                                        "`researcher_theme_name` and `researcher_notes` columns, then "
+                                        "re-run with different thresholds if needed. B&C 2006 explicitly "
+                                        "say Phase 3 is tentative and iterative."
+                                    )
+
+                                    gr.Markdown("### Clustering parameters (researcher-controlled)")
+                                    gr.Markdown(
+                                        "*B&C 2006 do not prescribe a fixed number of themes. "
+                                        "Themes emerge from the clustering threshold you set. "
+                                        "Lower similarity = fewer, broader themes. "
+                                        "Higher similarity = more, tighter themes.*"
+                                    )
+                                    with gr.Row():
+                                        p3_similarity = gr.Slider(
+                                            minimum=0.3, maximum=0.95, value=0.60, step=0.05,
+                                            label="Similarity threshold",
+                                            info="Codes more similar than this cluster together. Default 0.60.",
+                                        )
+                                        p3_min_size = gr.Slider(
+                                            minimum=2, maximum=10, value=2, step=1,
+                                            label="Minimum cluster size",
+                                            info="Clusters smaller than this go into noise bucket. Default 2.",
+                                        )
+
+                                    p3_run_btn = gr.Button(
+                                        "Run Phase 3 -- Cluster codes into candidate themes",
+                                        variant="primary",
+                                    )
+                                    p3_status = gr.Markdown("*No themes generated yet. Run Phase 2 first.*")
+
+                                    gr.Markdown("---")
+                                    gr.Markdown(
+                                        "### Candidate Themes Table\n"
+                                        "*Edit `researcher_theme_name` and `researcher_notes` to override "
+                                        "or refine the AI-generated theme names. Researcher is the final "
+                                        "authority (Braun & Clarke 2006, reflexive TA principle).*"
+                                    )
+                                    p3_themes_table = gr.Dataframe(
+                                        headers=[
+                                            "theme_id", "candidate_theme_name", "description",
+                                            "rationale", "member_codes", "code_count",
+                                            "researcher_theme_name", "researcher_notes",
+                                        ],
+                                        label="Phase 3 Candidate Themes -- edit researcher_theme_name and researcher_notes",
+                                        interactive=True,
+                                        wrap=True,
+                                    )
+
+                                    gr.Markdown("---")
+                                    gr.Markdown(
+                                        "### Noise Codes\n"
+                                        "*Codes that did not fit any cluster (below minimum cluster size). "
+                                        "Review these -- they may represent important edge cases or require "
+                                        "lower similarity threshold to be absorbed.*"
+                                    )
+                                    p3_noise_table = gr.Dataframe(
+                                        headers=["code_name", "definition"],
+                                        label="Noise codes (did not cluster)",
+                                        interactive=False,
+                                        wrap=True,
+                                    )
+
+                                    gr.Markdown("---")
+                                    p3_save_btn = gr.Button(
+                                        "Save Phase 3 output (themes + noise -> JSON artifact)",
+                                        variant="secondary",
+                                    )
+                                    p3_save_status = gr.Markdown("")
+
+            # ========================================================================
+    # ZONE 5 — Event wiring (.click handlers — the glue)
+    # ========================================================================
+    # Each .click() connects a button to a handler function. The function's
+    # return values go into the components listed in outputs=[...].
+    #
+    # GOLDEN RULE: the number of return values from the handler must match
+    # the length of the outputs list, in the same order.
+    #
+    # chat_outputs is the shared list used by process_message, submit_form,
+    # and new_chat. All three must return 8 values in the same order.
+    # ----------------
+    # Event wiring
+    # ----------------
+    chat_outputs = [
+        chatbot, table_out, extracted_out, chart_out, code_out,
+        downloads_state, downloads_files_out, chat_input,
+    ]
+
+    send_btn.click(
+        process_message,
+        inputs=[chat_input, mode_select, llm_provider_select, llm_key_input,
+                chatbot, loaded_context_state, downloads_state],
+        outputs=chat_outputs,
+    )
+    chat_input.submit(
+        process_message,
+        inputs=[chat_input, mode_select, llm_provider_select, llm_key_input,
+                chatbot, loaded_context_state, downloads_state],
+        outputs=chat_outputs,
+    )
+
+    form_submit.click(
+        submit_form,
+        inputs=[
+            form_task, form_op, form_a, form_b, form_city, form_notes,
+            mode_select, llm_provider_select, llm_key_input, chatbot,
+            loaded_context_state, downloads_state,
+        ],
+        outputs=chat_outputs,
+    )
+
+    form_clear.click(
+        clear_form,
+        outputs=[form_task, form_op, form_a, form_b, form_city, form_notes],
+    )
+
+    new_chat_btn.click(
+        new_chat,
+        inputs=[downloads_state],
+        outputs=chat_outputs,
+    )
+
+    # Data source handlers
+    scrape_btn.click(
+        scrape_url,
+        inputs=[url_input, downloads_state],
+        outputs=[scrape_preview, scrape_status, loaded_context_state,
+                 downloads_state, downloads_files_out],
+    )
+    scrape_clear_btn.click(
+        clear_scrape,
+        outputs=[url_input, scrape_preview, scrape_status, loaded_context_state],
+    )
+
+    pdf_extract_btn.click(
+        extract_pdf,
+        inputs=[pdf_input, downloads_state],
+        outputs=[pdf_preview, pdf_status, loaded_context_state,
+                 downloads_state, downloads_files_out],
+    )
+    pdf_clear_btn.click(
+        clear_pdf,
+        outputs=[pdf_input, pdf_preview, pdf_status, loaded_context_state],
+    )
+
+    csv_load_btn.click(
+        load_spreadsheet,
+        inputs=[csv_input, downloads_state],
+        outputs=[csv_preview, csv_status, loaded_context_state,
+                 downloads_state, downloads_files_out],
+    )
+    csv_clear_btn.click(
+        clear_spreadsheet,
+        outputs=[csv_input, csv_preview, csv_status, loaded_context_state],
+    )
+
+    ml_load_btn.click(
+        load_ml_examples,
+        inputs=[downloads_state],
+        outputs=[ml_preview, ml_status, loaded_context_state,
+                 downloads_state, downloads_files_out],
+    )
+    ml_clear_btn.click(
+        clear_ml_examples,
+        outputs=[ml_preview, ml_status, loaded_context_state],
+    )
+
+    # Training handlers (supervised)
+    train_btn.click(
+        handle_train,
+        inputs=[downloads_state],
+        outputs=[trained_state, train_status, confusion_out,
+                 downloads_state, downloads_files_out],
+    )
+    train_clear_btn.click(
+        clear_training,
+        outputs=[trained_state, train_status, confusion_out, predict_out],
+    )
+    predict_btn.click(
+        handle_predict,
+        inputs=[trained_state, predict_input, downloads_state],
+        outputs=[predict_out, downloads_state, downloads_files_out],
+    )
+    sup_label_filter.change(
+        filter_training_dataset,
+        inputs=[sup_label_filter],
+        outputs=[sup_dataset_view],
+    )
+
+    # Training handlers (unsupervised)
+    cluster_btn.click(
+        handle_cluster,
+        inputs=[cluster_sim, cluster_min, cluster_nnear, cluster_llm_toggle,
+                llm_provider_select, llm_key_input, downloads_state],
+        outputs=[cluster_out, cluster_status, downloads_state, downloads_files_out],
+    )
+    cluster_clear_btn.click(
+        clear_clustering,
+        outputs=[cluster_out, cluster_status],
+    )
+
+    # ---- Vector Processing wiring ----
+    vectorize_btn.click(
+        handle_vectorize_preview,
+        inputs=[embedding_provider_select, embedding_key_input, downloads_state],
+        outputs=[vectorize_out, vectorize_status,
+                 downloads_state, downloads_files_out],
+    )
+    vectorize_clear_btn.click(
+        clear_vectorize_preview,
+        outputs=[vectorize_out, vectorize_status],
+    )
+    vector_index_btn.click(
+        handle_vector_index,
+        inputs=[embedding_provider_select, embedding_key_input, downloads_state],
+        outputs=[vector_index_status, downloads_state, downloads_files_out],
+    )
+    vector_clear_btn.click(
+        handle_vector_clear,
+        inputs=[downloads_state],
+        outputs=[vector_index_status, downloads_state, downloads_files_out],
+    )
+    vector_search_btn.click(
+        handle_vector_search,
+        inputs=[vector_query, vector_n,
+                embedding_provider_select, embedding_key_input, downloads_state],
+        outputs=[vector_search_out, vector_search_status,
+                 downloads_state, downloads_files_out],
+    )
+
+    # ---- Workbench wiring ----
+    wb_cgt_run.click(
+        handle_wb_cgt,
+        inputs=[wb_cgt_msg, wb_cgt_sim, wb_cgt_min, wb_cgt_nnear,
+                llm_provider_select, llm_key_input,
+                loaded_context_state, downloads_state],
+        outputs=[wb_cgt_trace, wb_cgt_reply, wb_cgt_sentences,
+                 downloads_state, downloads_files_out],
+    )
+    wb_cta_run.click(
+        handle_wb_cta,
+        inputs=[wb_cta_msg, wb_cta_max,
+                llm_provider_select, llm_key_input,
+                loaded_context_state, downloads_state],
+        outputs=[wb_cta_trace, wb_cta_reply, wb_cta_codes,
+                 downloads_state, downloads_files_out],
+    )
+
+    # ---- Phase 1 Familiarization wiring ----
+    p1_load_test_btn.click(
+        handle_p1_load_test_csv,
+        inputs=[downloads_state],
+        outputs=[p1_corpus_state, p1_corpus_status, p1_corpus_preview,
+                 downloads_state, downloads_files_out],
+    )
+    p1_upload_csv.upload(
+        handle_p1_upload_csv,
+        inputs=[p1_upload_csv, downloads_state],
+        outputs=[p1_corpus_state, p1_corpus_status, p1_corpus_preview,
+                 downloads_state, downloads_files_out],
+    )
+    p1_build_table_btn.click(
+        handle_p1_build_validation_table,
+        inputs=[p1_corpus_state,
+                p1_facilitator_memo, p1_facilitator_transcript, p1_facilitator_citations,
+                p1_companion_challenges, p1_companion_reflexivity, p1_companion_breadth],
+        outputs=[p1_validation_table],
+    )
+    p1_save_btn.click(
+        handle_p1_save,
+        inputs=[p1_corpus_state,
+                p1_facilitator_memo, p1_facilitator_transcript, p1_facilitator_citations,
+                p1_companion_challenges, p1_companion_reflexivity, p1_companion_breadth,
+                p1_validation_table,
+                downloads_state],
+        outputs=[p1_save_status, downloads_state, downloads_files_out],
+    )
+
+    # ---- Phase 2 Initial Coding wiring ----
+    p2_refresh_btn.click(
+        handle_p2_refresh_corpus,
+        inputs=[p1_corpus_state,
+                p1_facilitator_memo, p1_companion_reflexivity, p1_validation_table],
+        outputs=[p2_corpus_status, p2_phase1_summary],
+    )
+    p2_run_iter1_btn.click(
+        lambda corpus, codes, codebook, memo, reflex, vtable, prov, key, orient:
+            handle_p2_run_iteration(1, corpus, codes, codebook, memo, reflex, vtable, prov, key, orient),
+        inputs=[p1_corpus_state, p2_codes_table, p2_codebook_table,
+                p1_facilitator_memo, p1_companion_reflexivity, p1_validation_table,
+                llm_provider_select, llm_key_input, p2_orientation],
+        outputs=[p2_codes_table, p2_codebook_table, p2_iter_status],
+    )
+    p2_run_iter2_btn.click(
+        lambda corpus, codes, codebook, memo, reflex, vtable, prov, key, orient:
+            handle_p2_run_iteration(2, corpus, codes, codebook, memo, reflex, vtable, prov, key, orient),
+        inputs=[p1_corpus_state, p2_codes_table, p2_codebook_table,
+                p1_facilitator_memo, p1_companion_reflexivity, p1_validation_table,
+                llm_provider_select, llm_key_input, p2_orientation],
+        outputs=[p2_codes_table, p2_codebook_table, p2_iter_status],
+    )
+    p2_run_iter3_btn.click(
+        lambda corpus, codes, codebook, memo, reflex, vtable, prov, key, orient:
+            handle_p2_run_iteration(3, corpus, codes, codebook, memo, reflex, vtable, prov, key, orient),
+        inputs=[p1_corpus_state, p2_codes_table, p2_codebook_table,
+                p1_facilitator_memo, p1_companion_reflexivity, p1_validation_table,
+                llm_provider_select, llm_key_input, p2_orientation],
+        outputs=[p2_codes_table, p2_codebook_table, p2_iter_status],
+    )
+    p2_save_btn.click(
+        handle_p2_save,
+        inputs=[p1_corpus_state, p2_codes_table, p2_codebook_table, downloads_state],
+        outputs=[p2_save_status, downloads_state, downloads_files_out],
+    )
+
+
+    # ---- Phase 3 Searching for Themes wiring ----
+    p3_run_btn.click(
+        handle_p3_run,
+        inputs=[
+            p2_codebook_table,
+            p3_similarity, p3_min_size, p2_orientation,
+            p1_companion_reflexivity,
+            llm_provider_select, llm_key_input,
+            downloads_state,
+        ],
+        outputs=[p3_themes_table, p3_noise_table, p3_status, downloads_state, downloads_files_out],
+    )
+    p3_save_btn.click(
+        handle_p3_save,
+        inputs=[p3_themes_table, p3_noise_table, downloads_state],
+        outputs=[p3_save_status, downloads_state, downloads_files_out],
+    )
+
+
+if __name__ == "__main__":
+    # ssr_mode=False: Gradio 5/6's Server-Side Rendering breaks demo.launch()
+    # on HuggingFace Spaces with the "localhost not accessible" error.
+    # Confirmed workaround from HF forums + Gradio Discord.
+    demo.launch(ssr_mode=False)