Spaces:

neovalle
/

SpatialDiscourseAnalysis

Sleeping

App Files Files Community

neovalle commited on Apr 16

Commit

f5be592

verified ·

1 Parent(s): e8fdf77

Upload app.py

Browse files

Files changed (1) hide show

app.py +450 -522

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """
-Discourse Compass — Gradio App for Linguists & General Public
 =============================================================
-• Interactive 3D Plotly scatter (rotate, zoom, pan)
-• Custom naming for poles and discourses
-• Plain-language results for non-technical users
-• Sentence embeddings via all-mpnet-base-v2 (768-dim)
 """
 import gradio as gr
@@ -12,11 +11,9 @@ import numpy as np
 import plotly.graph_objects as go
 from sentence_transformers import SentenceTransformer
 from sklearn.decomposition import PCA
-from scipy.spatial.distance import cosine, euclidean
 # ── Model ─────────────────────────────────────────────────────────────────────
 MODEL_NAME = "all-mpnet-base-v2"
-MODEL_DIM = 768
 _model = None
 def get_model():
@@ -25,7 +22,7 @@ def get_model():
         _model = SentenceTransformer(MODEL_NAME)
     return _model
-# ── Maths helpers ─────────────────────────────────────────────────────────────
 def parse_sentences(text):
     return [s.strip() for s in text.strip().splitlines() if s.strip()]
@@ -37,448 +34,386 @@ def angle_between(u, v):
     c = abs(float(np.dot(unit(u), unit(v))))
     return float(np.degrees(np.arccos(min(c, 1.0))))
-def thematic_breadth(vecs):
     return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
-def principal_axis(vecs):
     if vecs.shape[0] < 2:
-        return np.zeros(vecs.shape[1]), np.eye(vecs.shape[1])
-    vals, evecs = np.linalg.eigh(np.cov(vecs, rowvar=False))
-    order = np.argsort(vals)[::-1]
-    return vals[order], evecs[:, order]
-def semantic_heart(vecs):
-    return vecs.mean(axis=0)
-# ── Plain-language interpretation helpers ─────────────────────────────────────
-def breadth_label(score, all_scores):
-    mn, mx = min(all_scores), max(all_scores)
-    if mx == mn:
-        return "moderate"
-    r = (score - mn) / (mx - mn)
-    if r < 0.33:
-        return "tightly focused"
-    if r < 0.66:
-        return "moderately varied"
-    return "wide-ranging"
-def orientation_label(angle):
-    if angle < 20:
-        return "closely tracks the pole-to-pole spectrum"
-    if angle < 45:
-        return "partly follows the pole-to-pole spectrum"
-    if angle < 70:
-        return "drifts away from the pole-to-pole spectrum"
-    return "varies independently of the pole-to-pole spectrum"
-def strength_label(pct):
-    if pct > 0.6:
-        return "very consistent — sentences cluster in one direction"
-    if pct > 0.35:
-        return "moderately consistent"
-    return "diverse — sentences spread in many directions"
-def pull_label(cos_a, cos_b, name_a, name_b):
-    diff = abs(cos_a - cos_b)
-    closer = name_a if cos_a < cos_b else name_b
-    if diff < 0.05:
-        return f"sits roughly halfway between {name_a} and {name_b}"
-    elif diff < 0.15:
-        return f"leans toward {closer}"
     else:
-        return f"clearly closer to {closer}"
-# ── Plotly colour palette ─────────────────────────────────────────────────────
-COLORS = {
-    "A": "#5aa8ff",
-    "B": "#ff6b6b",
-    "D1": "#3dd6a3",
-    "D2": "#ffcc55",
-}
-BG_COLOR = "#0d0f1c"
-GRID_COLOR = "#1c2040"
-TEXT_COLOR = "#cdd5f0"
-# ── Interactive Plotly 3D renderer ────────────────────────────────────────────
-def build_plotly_figure(
-    pts_a, pts_b, pts_d1, pts_d2,
-    c_a, c_b, c_d1, c_d2,
-    ev_a, ev_b, ev_d1, ev_d2,
-    pca_ev,
-    name_a, name_b, name_d1, name_d2,
-):
-    fig = go.Figure()
-    # ── Sentence dots ─────────────────────────────────────────────────────
-    for pts, key, name, symbol in [
-        (pts_a, "A", name_a, "circle"),
-        (pts_b, "B", name_b, "circle"),
-        (pts_d1, "D1", name_d1, "square"),
-        (pts_d2, "D2", name_d2, "square"),
-    ]:
-        fig.add_trace(go.Scatter3d(
-            x=pts[:, 0], y=pts[:, 1], z=pts[:, 2],
             mode="markers",
-            marker=dict(size=5, color=COLORS[key], symbol=symbol,
-                        opacity=0.7, line=dict(width=0.5, color="white")),
-            name=f"{name} sentences",
-            legendgroup=key,
-            hovertemplate=f"{name} sentence<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
-        ))
-    # ── Centroids (diamonds) ──────────────────────────────────────────────
-    for c3, key, name in [
-        (c_a, "A", name_a),
-        (c_b, "B", name_b),
-        (c_d1, "D1", name_d1),
-        (c_d2, "D2", name_d2),
-    ]:
-        fig.add_trace(go.Scatter3d(
-            x=[c3[0]], y=[c3[1]], z=[c3[2]],
             mode="markers+text",
-            marker=dict(size=10, color=COLORS[key], symbol="diamond",
-                        line=dict(width=2, color="white")),
-            text=[f"◆ {name}"],
             textposition="top center",
-            textfont=dict(color=COLORS[key], size=11),
-            name=f"◆ Centre of {name}",
-            legendgroup=key,
-            showlegend=True,
-            hovertemplate=f"Centre of {name}<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
-        ))
-    # ── Pole axis (dashed line A↔B) ───────────────────────────────────────
-    fig.add_trace(go.Scatter3d(
-        x=[c_a[0], c_b[0]], y=[c_a[1], c_b[1]], z=[c_a[2], c_b[2]],
-        mode="lines",
-        line=dict(color="white", width=3, dash="dash"),
-        name=f"Spectrum: {name_a} ↔ {name_b}",
-        opacity=0.5,
-        hoverinfo="skip",
-    ))
-    # ── Spokes: discourse centres → pole centres ──────────────────────────
-    for c_disc, key, dname in [(c_d1, "D1", name_d1), (c_d2, "D2", name_d2)]:
-        for pole_pt, pname in [(c_a, name_a), (c_b, name_b)]:
-            fig.add_trace(go.Scatter3d(
-                x=[c_disc[0], pole_pt[0]],
-                y=[c_disc[1], pole_pt[1]],
-                z=[c_disc[2], pole_pt[2]],
-                mode="lines",
-                line=dict(color=COLORS[key], width=1.5, dash="dot"),
-                opacity=0.4,
-                showlegend=False,
-                hoverinfo="skip",
-            ))
-    # ── Principal direction arrows ────────────────────────────────────────
-    scale = 0.15
-    for c3, ev3, key, name in [
-        (c_a, ev_a, "A", name_a),
-        (c_b, ev_b, "B", name_b),
-        (c_d1, ev_d1, "D1", name_d1),
-        (c_d2, ev_d2, "D2", name_d2),
-    ]:
-        tip = c3 + ev3 * scale
-        tail = c3 - ev3 * scale
-        fig.add_trace(go.Scatter3d(
-            x=[tail[0], tip[0]], y=[tail[1], tip[1]], z=[tail[2], tip[2]],
-            mode="lines",
-            line=dict(color=COLORS[key], width=6),
             showlegend=False,
-            hovertemplate=f"Direction of variation — {name}<extra></extra>",
-        ))
-        # arrowhead
-        fig.add_trace(go.Scatter3d(
-            x=[tip[0]], y=[tip[1]], z=[tip[2]],
-            mode="markers",
-            marker=dict(size=5, color=COLORS[key], symbol="diamond"),
             showlegend=False,
-            hoverinfo="skip",
-        ))
-    # ── Layout ────────────────────────────────────────────────────────────
-    axis_template = dict(
-        backgroundcolor=BG_COLOR,
-        gridcolor=GRID_COLOR,
-        showbackground=True,
-        color=TEXT_COLOR,
-        tickfont=dict(size=9, color=TEXT_COLOR),
-    )
     fig.update_layout(
-        scene=dict(
-            xaxis=dict(title=f"Meaning Axis 1 ({pca_ev[0]:.0%})", **axis_template),
-            yaxis=dict(title=f"Meaning Axis 2 ({pca_ev[1]:.0%})", **axis_template),
-            zaxis=dict(title=f"Meaning Axis 3 ({pca_ev[2]:.0%})", **axis_template),
-        ),
-        paper_bgcolor=BG_COLOR,
-        plot_bgcolor=BG_COLOR,
-        font=dict(color=TEXT_COLOR),
         title=dict(
-            text=(
-                f"Discourse Compass — {name_a} vs {name_b}<br>"
-                f"<span style='font-size:12px;color:#5a6488;'>"
-                f"Drag to rotate · Scroll to zoom · {sum(pca_ev):.0%} of meaning variation shown</span>"
-            ),
-            x=0.5,
-            font=dict(size=16),
         ),
-        legend=dict(
-            bgcolor="rgba(19,22,42,0.9)",
-            bordercolor=GRID_COLOR,
-            borderwidth=1,
-            font=dict(size=10, color=TEXT_COLOR),
         ),
-        margin=dict(l=0, r=0, t=60, b=0),
-        height=620,
     )
-    return fig
-# ── Core analysis ─────────────────────────────────────────────────────────────
-def run_analysis(text_a, text_b, text_d1, text_d2,
-                 name_a, name_b, name_d1, name_d2):
-    # Default names if blank
-    name_a = name_a.strip() or "Pole A"
-    name_b = name_b.strip() or "Pole B"
-    name_d1 = name_d1.strip() or "Discourse 1"
-    name_d2 = name_d2.strip() or "Discourse 2"
-    sents_a = parse_sentences(text_a)
-    sents_b = parse_sentences(text_b)
-    sents_d1 = parse_sentences(text_d1)
-    sents_d2 = parse_sentences(text_d2)
-    errors = []
-    if not sents_a:
-        errors.append(f"{name_a} needs at least 1 sentence.")
-    if not sents_b:
-        errors.append(f"{name_b} needs at least 1 sentence.")
-    if not sents_d1:
-        errors.append(f"{name_d1} needs at least 1 sentence.")
-    if not sents_d2:
-        errors.append(f"{name_d2} needs at least 1 sentence.")
-    if errors:
-        return "⚠  " + "  |  ".join(errors), None
-    model = get_model()
-    all_sents = sents_a + sents_b + sents_d1 + sents_d2
-    all_vecs = model.encode(all_sents, normalize_embeddings=False,
-                            show_progress_bar=False)
-    na, nb, nd1, nd2 = len(sents_a), len(sents_b), len(sents_d1), len(sents_d2)
-    vecs_a = all_vecs[:na]
-    vecs_b = all_vecs[na:na + nb]
-    vecs_d1 = all_vecs[na + nb:na + nb + nd1]
-    vecs_d2 = all_vecs[na + nb + nd1:]
-    # Semantic Hearts (centroids)
-    heart_a = semantic_heart(vecs_a)
-    heart_b = semantic_heart(vecs_b)
-    heart_d1 = semantic_heart(vecs_d1)
-    heart_d2 = semantic_heart(vecs_d2)
-    # Thematic Breadth (spread)
-    bread_a = thematic_breadth(vecs_a)
-    bread_b = thematic_breadth(vecs_b)
-    bread_d1 = thematic_breadth(vecs_d1)
-    bread_d2 = thematic_breadth(vecs_d2)
-    all_breads = [bread_a, bread_b, bread_d1, bread_d2]
-    # Pole Orientation (eigenanalysis)
-    pole_vec = heart_b - heart_a
-    def cloud_eigen(vecs):
-        vals, evecs = principal_axis(vecs)
-        main = evecs[:, 0]
-        ang = angle_between(main, pole_vec)
-        exp = vals[0] / vals.sum() if vals.sum() > 1e-12 else 0.0
-        return main, ang, exp
-    ev_a, ang_a, exp_a = cloud_eigen(vecs_a)
-    ev_b, ang_b, exp_b = cloud_eigen(vecs_b)
-    ev_d1, ang_d1, exp_d1 = cloud_eigen(vecs_d1)
-    ev_d2, ang_d2, exp_d2 = cloud_eigen(vecs_d2)
-    # Centroid projection onto pole axis (scalar position)
-    pole_dir = unit(pole_vec)
-    proj_d1 = float(np.dot(heart_d1 - heart_a, pole_dir))
-    proj_d2 = float(np.dot(heart_d2 - heart_a, pole_dir))
-    pole_len = float(np.linalg.norm(pole_vec))
-    pct_d1 = proj_d1 / pole_len if pole_len > 1e-12 else 0.5
-    pct_d2 = proj_d2 / pole_len if pole_len > 1e-12 else 0.5
-    # PCA to 3D (visualisation only)
-    stack = np.vstack([all_vecs, heart_a, heart_b, heart_d1, heart_d2])
-    pca = PCA(n_components=3, random_state=42)
-    proj_3d = pca.fit_transform(stack)
-    pca_ev = pca.explained_variance_ratio_
-    n = len(all_sents)
-    pts_a_3d = proj_3d[:na]
-    pts_b_3d = proj_3d[na:na + nb]
-    pts_d1_3d = proj_3d[na + nb:na + nb + nd1]
-    pts_d2_3d = proj_3d[na + nb + nd1:n]
-    c_a_3d, c_b_3d = proj_3d[n], proj_3d[n + 1]
-    c_d1_3d, c_d2_3d = proj_3d[n + 2], proj_3d[n + 3]
-    # Rotate eigenvectors into 3D PCA space
-    ev_a_3d = unit(pca.components_ @ ev_a)
-    ev_b_3d = unit(pca.components_ @ ev_b)
-    ev_d1_3d = unit(pca.components_ @ ev_d1)
-    ev_d2_3d = unit(pca.components_ @ ev_d2)
-    # Build interactive Plotly figure
-    fig = build_plotly_figure(
-        pts_a_3d, pts_b_3d, pts_d1_3d, pts_d2_3d,
-        c_a_3d, c_b_3d, c_d1_3d, c_d2_3d,
-        ev_a_3d, ev_b_3d, ev_d1_3d, ev_d2_3d,
-        pca_ev,
-        name_a, name_b, name_d1, name_d2,
-    )
-    # ── Build plain-language report ───────────────────────────────────────
-    cos_d1_a = float(cosine(heart_d1, heart_a))
-    cos_d1_b = float(cosine(heart_d1, heart_b))
-    cos_d2_a = float(cosine(heart_d2, heart_a))
-    cos_d2_b = float(cosine(heart_d2, heart_b))
-    pole_cos = float(cosine(heart_a, heart_b))
-    if pole_cos > 0.4:
-        pole_sep_txt = "well separated — they represent clearly different viewpoints"
-    elif pole_cos > 0.2:
-        pole_sep_txt = "moderately separated"
-    else:
-        pole_sep_txt = "quite close — consider using more contrasting sentences"
-    def position_bar(pct, width=30):
-        """Visual bar showing where a discourse sits on the A↔B spectrum."""
-        pos = max(0, min(1, pct))
-        idx = int(round(pos * width))
-        return "│" + "─" * idx + "●" + "─" * (width - idx) + "│"
-    report_lines = [
-        f"{'═' * 62}",
-        f"  DISCOURSE COMPASS — Results",
-        f"{'═' * 62}",
-        f"",
-        f"  SETUP",
-        f"  ─────────────────────────────────────────────────────────",
-        f"  Pole:  {name_a}  ({na} sentences)",
-        f"  Pole:  {name_b}  ({nb} sentences)",
-        f"  Text:  {name_d1}  ({nd1} sentences)",
-        f"  Text:  {name_d2}  ({nd2} sentences)",
-        f"  Pole separation: {pole_sep_txt}",
-        f"",
-        f"",
-        f"  WHERE EACH TEXT SITS ON THE SPECTRUM",
-        f"  ─────────────────────────────────────────────────────────",
-        f"  Think of a line stretching from {name_a} on the left",
-        f"  to {name_b} on the right. Where does each text land?",
-        f"",
-        f"  {name_a:<20s}                              {name_b}",
-        f"  {name_d1}:",
-        f"  {position_bar(pct_d1)}  ({pct_d1:.0%} toward {name_b})",
-        f"  → {pull_label(cos_d1_a, cos_d1_b, name_a, name_b)}",
-        f"",
-        f"  {name_d2}:",
-        f"  {position_bar(pct_d2)}  ({pct_d2:.0%} toward {name_b})",
-        f"  → {pull_label(cos_d2_a, cos_d2_b, name_a, name_b)}",
-        f"",
-        f"",
-        f"  HOW SPREAD OUT IS EACH SET OF SENTENCES?",
-        f"  ─────────────────────────────────────────────────────────",
-        f"  Low = all sentences say roughly the same thing",
-        f"  High = sentences cover many different angles",
-        f"",
-        f"  {name_a:<22s}  {bread_a:.3f}  — {breadth_label(bread_a, all_breads)}",
-        f"  {name_b:<22s}  {bread_b:.3f}  — {breadth_label(bread_b, all_breads)}",
-        f"  {name_d1:<22s}  {bread_d1:.3f}  — {breadth_label(bread_d1, all_breads)}",
-        f"  {name_d2:<22s}  {bread_d2:.3f}  — {breadth_label(bread_d2, all_breads)}",
-        f"",
-        f"",
-        f"  WHAT DIRECTION DOES EACH TEXT VARY IN?",
-        f"  ─────────────────────────────────────────────────────────",
-        f"  This asks: when sentences in a group differ from each",
-        f"  other, do they differ along the {name_a}↔{name_b}",
-        f"  spectrum, or in some unrelated direction?",
-        f"",
-        f"  0°  = variation runs along the spectrum",
-        f"  90° = variation runs in a completely different direction",
-        f"",
-        f"  {name_a:<22s}  {ang_a:5.1f}°  — {orientation_label(ang_a)}",
-        f"  {name_b:<22s}  {ang_b:5.1f}°  — {orientation_label(ang_b)}",
-        f"  {name_d1:<22s}  {ang_d1:5.1f}°  — {orientation_label(ang_d1)}",
-        f"  {name_d2:<22s}  {ang_d2:5.1f}°  — {orientation_label(ang_d2)}",
-        f"",
-        f"  How consistent is each group?",
-        f"  {name_a:<22s}  {exp_a:.0%}  — {strength_label(exp_a)}",
-        f"  {name_b:<22s}  {exp_b:.0%}  — {strength_label(exp_b)}",
-        f"  {name_d1:<22s}  {exp_d1:.0%}  — {strength_label(exp_d1)}",
-        f"  {name_d2:<22s}  {exp_d2:.0%}  — {strength_label(exp_d2)}",
-        f"",
-        f"{'═' * 62}",
-        f"  All measurements use the full {MODEL_DIM}-dimensional meaning",
-        f"  space of {MODEL_NAME}. The 3D map is a simplified view",
-        f"  for visual orientation — rotate and zoom it above.",
-        f"{'═' * 62}",
-    ]
-    report = "\n".join(report_lines)
     return report, fig
-# ── Demo placeholders ─────────────────────────────────────────────────────────
-PLACEHOLDER_A = """\
-The economy is growing rapidly.
-Unemployment is at a record low.
-Businesses are thriving and profits are up.
-Consumer spending is at an all-time high."""
-PLACEHOLDER_B = """\
-Climate change is an existential crisis.
-We must reduce carbon emissions immediately.
-Renewable energy is the only sustainable future.
-The planet is warming at an alarming rate."""
-PLACEHOLDER_D1 = """\
-The stock market reached a new record today.
-Interest rates are being adjusted to control inflation.
-Foreign direct investment increased by 12% this quarter."""
-PLACEHOLDER_D2 = """\
-Arctic ice sheets are melting faster than predicted.
-Scientists warn of irreversible tipping points.
-Carbon capture technology is advancing but not fast enough."""
-# ── Explainer content ─────────────────────────────────────────────────────────
-EXPLAINER_HOW = """
-### How does this tool work?
-Every sentence carries meaning. This tool uses an AI language model to translate
-each sentence into a **point in meaning-space** — an invisible map where sentences
-that mean similar things sit close together, and sentences with very different
-meanings sit far apart.
-You define **two poles** by giving example sentences for each — for instance,
-*economic growth* vs *climate crisis*. These poles create a spectrum.
-Then you enter two sets of text (the "discourses") and the tool measures
-where each one sits on that spectrum. The results tell you:
-- **Which pole each text is closer to** (and by how much)
-- **How spread out** each set of sentences is (focused vs wide-ranging)
-- **What direction** the sentences vary in (along the spectrum, or off to the side)
-The 3D map lets you **see** the results — each dot is a sentence, and you can
-rotate and zoom to explore how they cluster.
-"""
 # ── CSS ───────────────────────────────────────────────────────────────────────
 CSS = """
 body, .gradio-container   { background: #0d0f1c !important; }
@@ -496,133 +431,126 @@ label span                 { color: #8892bb !important;
                              border: none !important;
                              font-weight: 800 !important;
                              font-size: 1.05rem !important;
-                             letter-spacing: 0.03em !important;
                              border-radius: 10px !important; }
 .run-btn:hover             { opacity: 0.86 !important; }
 .output-text textarea      { font-family: 'Courier New', monospace !important;
                              font-size: 0.79rem !important;
                              color: #7dd8f8 !important;
-                             line-height: 1.55 !important; }
 h1, h2, h3, h4             { color: #dde4f8 !important; }
-.gr-accordion              { border: 1px solid #1c2040 !important;
-                             border-radius: 10px !important; }
-.name-box input            { font-weight: 700 !important;
-                             font-size: 0.95rem !important; }
 """
 # ── UI ────────────────────────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Discourse Compass") as demo:
-    # ── Header ────────────────────────────────────────────────────────────
     gr.HTML("""
-    <div style="padding: 8px 0 20px 0;">
-      <h1 style="color:#dde4f8; font-size:2rem; font-weight:900;
-                 margin-bottom:6px; letter-spacing:-0.5px;">
         🧭 Discourse Compass
       </h1>
-      <p style="color:#5a6488; font-size:0.92rem; margin:0; max-width:700px;">
-        Define two semantic poles with example sentences, then find out where
-        any text sits between them — with plain-language explanations.
       </p>
     </div>""")
-    with gr.Accordion("💡  How does this work?  (click to read)", open=False):
-        gr.Markdown(EXPLAINER_HOW)
-    gr.HTML("<hr style='border-color:#1c2040; margin: 8px 0 20px 0;'>")
-    # ── Step 1: Poles ─────────────────────────────────────────────────────
-    gr.HTML("""
-    <h3 style="color:#dde4f8; margin-bottom:4px;">Step 1 — Define your two poles</h3>
-    <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
-      Enter several sentences that represent each extreme. One sentence per line.
-    </p>""")
     with gr.Row():
         with gr.Column():
-            gr.HTML("<span style='color:#5aa8ff;font-weight:700;'>🔵 POLE A</span>")
-            name_a_box = gr.Textbox(label="Name for Pole A",
-                                    value="Economic Growth",
-                                    elem_classes=["name-box"])
-            pole_a = gr.Textbox(label="Sentences — one per line",
-                                lines=7, value=PLACEHOLDER_A)
         with gr.Column():
-            gr.HTML("<span style='color:#ff6b6b;font-weight:700;'>🔴 POLE B</span>")
-            name_b_box = gr.Textbox(label="Name for Pole B",
-                                    value="Climate Crisis",
-                                    elem_classes=["name-box"])
-            pole_b = gr.Textbox(label="Sentences — one per line",
-                                lines=7, value=PLACEHOLDER_B)
-    gr.HTML("<hr style='border-color:#1c2040; margin: 20px 0;'>")
-    # ── Step 2: Discourses ────────────────────────────────────────────────
-    gr.HTML("""
-    <h3 style="color:#dde4f8; margin-bottom:4px;">Step 2 — Enter the texts to analyse</h3>
-    <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
-      These are the texts whose position between the poles you want to measure.
-    </p>""")
     with gr.Row():
         with gr.Column():
-            gr.HTML("<span style='color:#3dd6a3;font-weight:700;'>🟢 TEXT 1</span>")
-            name_d1_box = gr.Textbox(label="Name for Text 1",
-                                     value="Financial News",
-                                     elem_classes=["name-box"])
-            disc1 = gr.Textbox(label="Sentences — one per line",
-                               lines=5, value=PLACEHOLDER_D1)
         with gr.Column():
-            gr.HTML("<span style='color:#ffcc55;font-weight:700;'>🟡 TEXT 2</span>")
-            name_d2_box = gr.Textbox(label="Name for Text 2",
-                                     value="Climate Reporting",
-                                     elem_classes=["name-box"])
-            disc2 = gr.Textbox(label="Sentences — one per line",
-                               lines=5, value=PLACEHOLDER_D2)
-    # ── Run button ────────────────────────────────────────────────────────
-    gr.HTML("<div style='margin: 24px 0 8px 0;'>")
-    run_btn = gr.Button("⚡  Run Analysis", variant="primary",
-                        size="lg", elem_classes=["run-btn"])
-    gr.HTML("</div>")
-    gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
-    # ── Results ───────────────────────────────────────────────────────────
-    gr.HTML("""
-    <h3 style="color:#dde4f8; margin: 0 0 4px 0;">📊 Interactive Semantic Map</h3>
-    <p style="color:#5a6488; font-size:0.84rem; margin:0 0 12px 0;">
-      Each dot is a sentence. Diamonds (◆) mark the centre of each group.
-      <strong>Drag to rotate · scroll to zoom · click legend items to toggle.</strong>
-    </p>""")
-    plot_out = gr.Plot(label="Semantic Map")
-    gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
-    gr.HTML("""
-    <h3 style="color:#dde4f8; margin: 0 0 4px 0;">📋 Results Report</h3>
-    <p style="color:#5a6488; font-size:0.84rem; margin:0 0 10px 0;">
-      Plain-language summary of every measurement.
-    </p>""")
-    text_out = gr.Textbox(label="Results", lines=42, interactive=False,
-                          elem_classes=["output-text"])
-    # ── Wire up events ────────────────────────────────────────────────────
     run_btn.click(
         fn=run_analysis,
-        inputs=[pole_a, pole_b, disc1, disc2,
-                name_a_box, name_b_box, name_d1_box, name_d2_box],
-        outputs=[text_out, plot_out],
     )
-    gr.HTML(f"""
-    <p style="color:#1e2440; font-size:0.74rem; text-align:center;
-              margin-top:28px; padding-bottom:12px;">
-      All measurements use the full {MODEL_DIM}-dimensional meaning space of
-      <code>{MODEL_NAME}</code>.
-      The 3D map is a simplified view (PCA) for orientation only.
     </p>""")
 if __name__ == "__main__":

 """
+Discourse Compass — Gradio App
 =============================================================
+Spatial-geometric discourse analysis for corpus analysts.
+Plain-language results focused on position, cluster tightness,
+and comparative interpretation.
 """
 import gradio as gr
 import plotly.graph_objects as go
 from sentence_transformers import SentenceTransformer
 from sklearn.decomposition import PCA
 # ── Model ─────────────────────────────────────────────────────────────────────
 MODEL_NAME = "all-mpnet-base-v2"
 _model = None
 def get_model():
         _model = SentenceTransformer(MODEL_NAME)
     return _model
+# ── Helpers ───────────────────────────────────────────────────────────────────
 def parse_sentences(text):
     return [s.strip() for s in text.strip().splitlines() if s.strip()]
     c = abs(float(np.dot(unit(u), unit(v))))
     return float(np.degrees(np.arccos(min(c, 1.0))))
+def frobenius_spread(vecs):
+    """Total spread of a point cloud (Frobenius norm of centred matrix)."""
     return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
+def pc1_axis_angle(vecs, axis):
+    """Angle between the first principal component and a given axis vector."""
     if vecs.shape[0] < 2:
+        return 90.0
+    cov = np.cov(vecs, rowvar=False)
+    vals, evecs = np.linalg.eigh(cov)
+    pc1 = evecs[:, np.argmax(vals)]
+    return angle_between(pc1, axis)
+def isotropy(vecs):
+    """λ_min / λ_max — how spherical the point cloud is (0=line, 1=sphere)."""
+    if vecs.shape[0] < 2:
+        return 0.0
+    cov = np.cov(vecs, rowvar=False)
+    vals = np.linalg.eigvalsh(cov)
+    vals = vals[vals > 1e-12]
+    if len(vals) < 2:
+        return 0.0
+    return float(vals.min() / vals.max())
+# ── Pole separation label ─────────────────────────────────────────────────────
+def pole_sep_label(sep):
+    if sep >= 0.5:
+        return "strong", "The axis cleanly separates the two poles — results are reliable."
+    elif sep >= 0.3:
+        return "moderate", "The axis separates the poles reasonably well — results are meaningful."
+    elif sep >= 0.15:
+        return "weak", "The poles are only weakly separated — interpret results with caution."
+    else:
+        return "very weak", "The poles are barely distinguishable — axis may not be valid."
+# ── Position percentage helper ─────────���──────────────────────────────────────
+def position_pct(score, neg_mean, pos_mean):
+    """Map a score to 0–100% between the two pole centroids."""
+    span = pos_mean - neg_mean
+    if abs(span) < 1e-9:
+        return 50.0
+    return float(np.clip((score - neg_mean) / span * 100, 0, 100))
+# ── Bar renderer ──────────────────────────────────────────────────────────────
+def render_bar(pct, label, width=44):
+    pos = int(round(pct / 100 * width))
+    bar = "░" * pos + "●" + "░" * (width - pos)
+    return f"  {bar}  ({pct:.0f}%)\n  → {label}"
+# ── Spread label ──────────────────────────────────────────────────────────────
+def spread_label(spread, is_pole=False):
+    if is_pole:
+        if spread > 2.0:
+            return "wide-ranging (as expected for a pole corpus)"
+        else:
+            return "fairly focused for a pole corpus"
+    else:
+        if spread < 1.0:
+            return "very tightly focused"
+        elif spread < 1.8:
+            return "tightly focused"
+        elif spread < 2.5:
+            return "moderately varied"
+        else:
+            return "wide-ranging"
+# ── Reliability label from spread ────────────────────────────────────────────
+def reliability_label(spread):
+    if spread < 1.0:
+        return "very reliable — sentences are highly consistent"
+    elif spread < 1.8:
+        return "reliable — sentences cluster closely together"
+    elif spread < 2.5:
+        return "moderately reliable — some internal variation"
+    else:
+        return "less reliable — sentences pull in quite different directions"
+# ── Axis relevance label ──────────────────────────────────────────────────────
+def axis_relevance_label(angle):
+    """How much of the text's variation runs along the pole axis."""
+    if angle < 30:
+        return "high", "sentences mostly differ by being more or less aligned with the poles"
+    elif angle < 60:
+        return "moderate", "sentences differ partly along the pole axis, partly on other dimensions"
+    else:
+        return "low", "sentences differ mainly on dimensions unrelated to this axis"
+# ── Gap interpretation ────────────────────────────────────────────────────────
+def gap_label(gap_pct):
+    if gap_pct >= 40:
+        return "very large — a clear, unmistakeable difference"
+    elif gap_pct >= 25:
+        return "substantial — a meaningful difference"
+    elif gap_pct >= 12:
+        return "moderate — a noticeable but not dramatic difference"
+    elif gap_pct >= 5:
+        return "small — the texts are fairly similar in position"
     else:
+        return "negligible — no clear difference in position"
+# ── Main analysis function ────────────────────────────────────────────────────
+def run_analysis(
+    pole_neg_name, pole_neg_text,
+    pole_pos_name, pole_pos_text,
+    text1_name, text1_text,
+    text2_name, text2_text,
+):
+    # ── Parse inputs ──────────────────────────────────────────────────────
+    pole_neg_sents = parse_sentences(pole_neg_text)
+    pole_pos_sents = parse_sentences(pole_pos_text)
+    text1_sents    = parse_sentences(text1_text)
+    text2_sents    = parse_sentences(text2_text)
+    errors = []
+    if len(pole_neg_sents) < 3:
+        errors.append(f"'{pole_neg_name}' pole needs at least 3 sentences.")
+    if len(pole_pos_sents) < 3:
+        errors.append(f"'{pole_pos_name}' pole needs at least 3 sentences.")
+    if len(text1_sents) < 1:
+        errors.append(f"'{text1_name}' needs at least 1 sentence.")
+    if len(text2_sents) < 1:
+        errors.append(f"'{text2_name}' needs at least 1 sentence.")
+    if errors:
+        return "\n".join(errors), None
+    # ── Embed ────────────────────────────────────────────────────────────��
+    model = get_model()
+    all_sents = pole_neg_sents + pole_pos_sents + text1_sents + text2_sents
+    all_vecs  = model.encode(all_sents, normalize_embeddings=True,
+                              show_progress_bar=False)
+    n_neg  = len(pole_neg_sents)
+    n_pos  = len(pole_pos_sents)
+    n_t1   = len(text1_sents)
+    vecs_neg  = all_vecs[:n_neg]
+    vecs_pos  = all_vecs[n_neg:n_neg+n_pos]
+    vecs_t1   = all_vecs[n_neg+n_pos:n_neg+n_pos+n_t1]
+    vecs_t2   = all_vecs[n_neg+n_pos+n_t1:]
+    # ── Axis construction ─────────────────────────────────────────────────
+    c_neg = vecs_neg.mean(axis=0)
+    c_pos = vecs_pos.mean(axis=0)
+    axis  = unit(c_pos - c_neg)
+    pole_sep = float(np.dot(c_pos, axis) - np.dot(c_neg, axis))
+    sep_word, sep_note = pole_sep_label(pole_sep)
+    # ── Projections ───────────────────────────────────────────────────────
+    proj_neg = float(np.dot(c_neg, axis))
+    proj_pos = float(np.dot(c_pos, axis))
+    proj_t1  = float(np.dot(vecs_t1.mean(axis=0), axis))
+    proj_t2  = float(np.dot(vecs_t2.mean(axis=0), axis))
+    pct_neg = position_pct(proj_neg, proj_neg, proj_pos)   # 0%
+    pct_pos = position_pct(proj_pos, proj_neg, proj_pos)   # 100%
+    pct_t1  = position_pct(proj_t1,  proj_neg, proj_pos)
+    pct_t2  = position_pct(proj_t2,  proj_neg, proj_pos)
+    gap_pct = abs(pct_t1 - pct_t2)
+    # ── Position labels ───────────────────────────────────────────────────
+    def position_desc(pct, pn, pp):
+        if pct <= 15:
+            return f"very close to the {pn} pole"
+        elif pct <= 35:
+            return f"closer to the {pn} pole"
+        elif pct <= 50:
+            return f"slightly closer to the {pn} pole"
+        elif pct <= 65:
+            return f"slightly closer to the {pp} pole"
+        elif pct <= 85:
+            return f"closer to the {pp} pole"
+        else:
+            return f"very close to the {pp} pole"
+    desc_t1 = position_desc(pct_t1, pole_neg_name, pole_pos_name)
+    desc_t2 = position_desc(pct_t2, pole_neg_name, pole_pos_name)
+    # ── Spread ────────────────────────────────────────────────────────────
+    spread_neg = frobenius_spread(vecs_neg)
+    spread_pos = frobenius_spread(vecs_pos)
+    spread_t1  = frobenius_spread(vecs_t1)
+    spread_t2  = frobenius_spread(vecs_t2)
+    rel_t1 = reliability_label(spread_t1)
+    rel_t2 = reliability_label(spread_t2)
+    # ── Axis relevance ────────────────────────────────────────────────────
+    angle_t1 = pc1_axis_angle(vecs_t1, axis)
+    angle_t2 = pc1_axis_angle(vecs_t2, axis)
+    ar_word_t1, ar_desc_t1 = axis_relevance_label(angle_t1)
+    ar_word_t2, ar_desc_t2 = axis_relevance_label(angle_t2)
+    # ── Verdict ───────────────────────────────────────────────────────────
+    gap_desc = gap_label(gap_pct)
+    if gap_pct < 5:
+        verdict = (f"No clear difference: {text1_name} and {text2_name} sit "
+                   f"in very similar positions on the {pole_neg_name}↔{pole_pos_name} spectrum.")
+    else:
+        closer_neg  = text1_name if pct_t1 < pct_t2 else text2_name
+        closer_pos  = text2_name if pct_t1 < pct_t2 else text1_name
+        verdict = (f"{closer_neg} aligns more closely with {pole_neg_name}; "
+                   f"{closer_pos} aligns more closely with {pole_pos_name}. "
+                   f"The gap between them is {gap_desc}.")
+    # Reliability caveat
+    caveats = []
+    if spread_t1 > 2.5:
+        caveats.append(f"{text1_name} is wide-ranging — its position score is an average of quite different sentences.")
+    if spread_t2 > 2.5:
+        caveats.append(f"{text2_name} is wide-ranging — its position score is an average of quite different sentences.")
+    if sep_word in ("weak", "very weak"):
+        caveats.append(f"The axis itself has {sep_word} pole separation — treat all results with caution.")
+    # ── Report ────────────────────────────────────────────────────────────
+    W = 62
+    SEP = "═" * W
+    lines = [
+        SEP,
+        "  DISCOURSE COMPASS — Results",
+        SEP,
+        "",
+        f"  AXIS:  {pole_neg_name}  ←{'─'*20}→  {pole_pos_name}",
+        f"  Pole separation: {sep_word} ({pole_sep:.2f}) — {sep_note}",
+        "",
+        "─" * W,
+        "  WHERE EACH TEXT SITS ON THE SPECTRUM",
+        "─" * W,
+        f"  Reading: 0% = {pole_neg_name} pole  |  100% = {pole_pos_name} pole",
+        "",
+        f"  {pole_neg_name} pole  {'░'*21}●{'░'*21}  (0%)",
+        "",
+        f"  {text1_name}:",
+        render_bar(pct_t1, desc_t1),
+        "",
+        f"  {text2_name}:",
+        render_bar(pct_t2, desc_t2),
+        "",
+        f"  {pole_pos_name} pole  {'░'*21}●{'░'*21}  (100%)",
+        "",
+        f"  Gap between texts: {gap_pct:.0f} percentage points — {gap_desc}.",
+        "",
+        "─" * W,
+        "  HOW CONSISTENTLY DO THE SENTENCES CLUSTER?",
+        "─" * W,
+        "  A tight cluster means all sentences point in the same direction.",
+        "  A loose cluster means they pull in different directions — the",
+        "  position score becomes less reliable as an overall summary.",
+        "",
+        f"  {pole_neg_name} pole    spread = {spread_neg:.2f}  — {spread_label(spread_neg, is_pole=True)}",
+        f"  {pole_pos_name} pole    spread = {spread_pos:.2f}  — {spread_label(spread_pos, is_pole=True)}",
+        f"  {text1_name:<22} spread = {spread_t1:.2f}  — {spread_label(spread_t1)}",
+        f"    Position score is {rel_t1}.",
+        f"  {text2_name:<22} spread = {spread_t2:.2f}  — {spread_label(spread_t2)}",
+        f"    Position score is {rel_t2}.",
+        "",
+        "─" * W,
+        "  HOW AXIS-RELEVANT IS THE VARIATION?",
+        "─" * W,
+        "  This checks whether the sentences within each text differ from",
+        "  each other mainly along the pole axis, or mainly on unrelated",
+        "  dimensions (topic, register, tone, etc.).",
+        "",
+        f"  {text1_name}: axis relevance is {ar_word_t1}",
+        f"    → {ar_desc_t1}.",
+        f"  {text2_name}: axis relevance is {ar_word_t2}",
+        f"    → {ar_desc_t2}.",
+        "",
+    ]
+    if caveats:
+        lines += ["─" * W, "  ⚠  CAVEATS", "─" * W]
+        for c in caveats:
+            lines.append(f"  • {c}")
+        lines.append("")
+    lines += [
+        "─" * W,
+        "  SUMMARY",
+        "─" * W,
+        f"  {verdict}",
+        "",
+    ]
+    if caveats:
+        lines.append("  ⚠  See caveats above before drawing strong conclusions.")
+    else:
+        lines.append("  Results appear reliable. No major caveats.")
+    lines += [
+        "",
+        SEP,
+        "  Measurements use the full 768-dimensional meaning space of",
+        f"  {MODEL_NAME}. The 3D map is a simplified view.",
+        SEP,
+    ]
+    report = "\n".join(lines)
+    # ── 3D Plot ───────────────────────────────────────────────────────────
+    pca = PCA(n_components=3)
+    all_study = np.vstack([vecs_neg, vecs_pos, vecs_t1, vecs_t2])
+    coords = pca.fit_transform(all_study)
+    i0 = 0
+    i1 = n_neg
+    i2 = n_neg + n_pos
+    i3 = n_neg + n_pos + n_t1
+    def make_trace(coords_slice, name, color, symbol, size=6):
+        x, y, z = coords_slice[:,0], coords_slice[:,1], coords_slice[:,2]
+        return go.Scatter3d(
+            x=x, y=y, z=z,
             mode="markers",
+            name=name,
+            marker=dict(size=size, color=color, symbol=symbol, opacity=0.82),
+        )
+    # Pole centroid markers (larger stars)
+    c_neg_3d = coords[:n_neg].mean(axis=0)
+    c_pos_3d = coords[n_neg:n_neg+n_pos].mean(axis=0)
+    traces = [
+        make_trace(coords[i0:i1], f"{pole_neg_name} (pole)", "#e05555", "circle"),
+        make_trace(coords[i1:i2], f"{pole_pos_name} (pole)", "#4a9eff", "circle"),
+        make_trace(coords[i2:i3], text1_name, "#f5a623", "diamond", size=8),
+        make_trace(coords[i3:],   text2_name, "#7ed321", "square",  size=8),
+        go.Scatter3d(
+            x=[c_neg_3d[0]], y=[c_neg_3d[1]], z=[c_neg_3d[2]],
             mode="markers+text",
+            name=f"{pole_neg_name} centroid",
+            text=[pole_neg_name],
             textposition="top center",
+            marker=dict(size=12, color="#e05555", symbol="cross"),
             showlegend=False,
+        ),
+        go.Scatter3d(
+            x=[c_pos_3d[0]], y=[c_pos_3d[1]], z=[c_pos_3d[2]],
+            mode="markers+text",
+            name=f"{pole_pos_name} centroid",
+            text=[pole_pos_name],
+            textposition="top center",
+            marker=dict(size=12, color="#4a9eff", symbol="cross"),
             showlegend=False,
+        ),
+    ]
+    fig = go.Figure(data=traces)
     fig.update_layout(
         title=dict(
+            text=f"Sentence Clusters: {pole_neg_name} ↔ {pole_pos_name}",
+            font=dict(color="#dde4f8", size=14),
         ),
+        scene=dict(
+            xaxis=dict(title=f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)",
+                       backgroundcolor="#0d0f1c", gridcolor="#1c2040",
+                       color="#8892bb"),
+            yaxis=dict(title=f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)",
+                       backgroundcolor="#0d0f1c", gridcolor="#1c2040",
+                       color="#8892bb"),
+            zaxis=dict(title=f"PC3 ({pca.explained_variance_ratio_[2]*100:.1f}%)",
+                       backgroundcolor="#0d0f1c", gridcolor="#1c2040",
+                       color="#8892bb"),
+            bgcolor="#0d0f1c",
         ),
+        paper_bgcolor="#0d0f1c",
+        plot_bgcolor="#0d0f1c",
+        font=dict(color="#dde4f8"),
+        legend=dict(bgcolor="#13162a", bordercolor="#1c2040",
+                    font=dict(color="#dde4f8")),
+        margin=dict(l=0, r=0, t=40, b=0),
+        height=520,
     )
     return report, fig
 # ── CSS ───────────────────────────────────────────────────────────────────────
 CSS = """
 body, .gradio-container   { background: #0d0f1c !important; }
                              border: none !important;
                              font-weight: 800 !important;
                              font-size: 1.05rem !important;
                              border-radius: 10px !important; }
 .run-btn:hover             { opacity: 0.86 !important; }
 .output-text textarea      { font-family: 'Courier New', monospace !important;
                              font-size: 0.79rem !important;
                              color: #7dd8f8 !important;
+                             line-height: 1.6 !important; }
 h1, h2, h3, h4             { color: #dde4f8 !important; }
 """
+INTRO = """
+**Discourse Compass** positions any text on a spectrum between two semantic poles you define.
+Enter example sentences for each pole, then enter the texts you want to compare.
+Each sentence goes on its own line.
+"""
 # ── UI ────────────────────────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Discourse Compass") as demo:
     gr.HTML("""
+    <div style="padding:8px 0 16px 0;">
+      <h1 style="color:#dde4f8;font-size:2rem;font-weight:900;
+                 margin-bottom:4px;letter-spacing:-0.5px;">
         🧭 Discourse Compass
       </h1>
+      <p style="color:#5a6488;font-size:0.9rem;margin:0;">
+        Position any text on a spectrum between two semantic poles —
+        plain-language results for corpus analysts.
       </p>
     </div>""")
+    gr.Markdown(INTRO)
     with gr.Row():
+        # ── Pole A ────────────────────────────────────────────────────────
         with gr.Column():
+            gr.HTML("<h3 style='color:#e05555;margin-bottom:4px;'>◀ Pole A</h3>")
+            pole_neg_name = gr.Textbox(
+                value="growth critical",
+                label="Name for Pole A",
+                placeholder="e.g. ecocentric, conservative, pro-regulation …",
+            )
+            pole_neg_text = gr.Textbox(
+                label="Example sentences for Pole A (one per line, min. 3)",
+                lines=8,
+                placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
+            )
+        # ── Pole B ────────────────────────────────────────────────────────
         with gr.Column():
+            gr.HTML("<h3 style='color:#4a9eff;margin-bottom:4px;'>▶ Pole B</h3>")
+            pole_pos_name = gr.Textbox(
+                value="growth favoured",
+                label="Name for Pole B",
+                placeholder="e.g. anthropocentric, progressive, pro-market …",
+            )
+            pole_pos_text = gr.Textbox(
+                label="Example sentences for Pole B (one per line, min. 3)",
+                lines=8,
+                placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
+            )
+    gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
     with gr.Row():
+        # ── Text 1 ────────────────────────────────────────────────────────
         with gr.Column():
+            gr.HTML("<h3 style='color:#f5a623;margin-bottom:4px;'>◆ Text A</h3>")
+            text1_name = gr.Textbox(
+                value="Text A",
+                label="Name for Text A",
+                placeholder="e.g. Financial News, Corpus 1, Policy Document …",
+            )
+            text1_text = gr.Textbox(
+                label="Sentences from Text A (one per line)",
+                lines=6,
+                placeholder="Paste sentences here.\nOne sentence per line.",
+            )
+        # ── Text 2 ────────────────────────────────────────────────────────
         with gr.Column():
+            gr.HTML("<h3 style='color:#7ed321;margin-bottom:4px;'>◆ Text B</h3>")
+            text2_name = gr.Textbox(
+                value="Text B",
+                label="Name for Text B",
+                placeholder="e.g. Climate Reporting, Corpus 2, Interview Data …",
+            )
+            text2_text = gr.Textbox(
+                label="Sentences from Text B (one per line)",
+                lines=6,
+                placeholder="Paste sentences here.\nOne sentence per line.",
+            )
+    run_btn = gr.Button("▶  Run Analysis", elem_classes=["run-btn"])
+    gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
+    plot_out  = gr.Plot(label="3D Sentence Map (rotate & zoom)")
+    report_out = gr.Textbox(
+        label="Results",
+        lines=40,
+        interactive=False,
+        elem_classes=["output-text"],
+    )
     run_btn.click(
         fn=run_analysis,
+        inputs=[
+            pole_neg_name, pole_neg_text,
+            pole_pos_name, pole_pos_text,
+            text1_name, text1_text,
+            text2_name, text2_text,
+        ],
+        outputs=[report_out, plot_out],
     )
+    gr.HTML("""
+    <p style="color:#2a2e4a;font-size:0.73rem;text-align:center;
+              margin-top:20px;padding-bottom:10px;">
+      Embeddings: all-mpnet-base-v2 (768-dim) · H4rmony Project
     </p>""")
 if __name__ == "__main__":