neovalle commited on
Commit
91e0e03
Β·
verified Β·
1 Parent(s): 4589a2b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +609 -438
app.py CHANGED
@@ -1,9 +1,10 @@
1
  """
2
- Discourse Compass β€” Gradio App
3
  =============================================================
4
- Spatial-geometric discourse analysis for corpus analysts.
5
- Plain-language results focused on position, cluster tightness,
6
- and comparative interpretation.
 
7
  """
8
 
9
  import gradio as gr
@@ -11,9 +12,11 @@ import numpy as np
11
  import plotly.graph_objects as go
12
  from sentence_transformers import SentenceTransformer
13
  from sklearn.decomposition import PCA
 
14
 
15
  # ── Model ─────────────────────────────────────────────────────────────────────
16
  MODEL_NAME = "all-mpnet-base-v2"
 
17
  _model = None
18
 
19
  def get_model():
@@ -22,7 +25,7 @@ def get_model():
22
  _model = SentenceTransformer(MODEL_NAME)
23
  return _model
24
 
25
- # ── Helpers ───────────────────────────────────────────────────────────────────
26
  def parse_sentences(text):
27
  return [s.strip() for s in text.strip().splitlines() if s.strip()]
28
 
@@ -34,385 +37,546 @@ def angle_between(u, v):
34
  c = abs(float(np.dot(unit(u), unit(v))))
35
  return float(np.degrees(np.arccos(min(c, 1.0))))
36
 
37
- def frobenius_spread(vecs):
38
- """Total spread of a point cloud (Frobenius norm of centred matrix)."""
39
  return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
40
 
41
- def pc1_axis_angle(vecs, axis):
42
- """Angle between the first principal component and a given axis vector."""
43
  if vecs.shape[0] < 2:
44
- return 90.0
45
- cov = np.cov(vecs, rowvar=False)
46
- vals, evecs = np.linalg.eigh(cov)
47
- pc1 = evecs[:, np.argmax(vals)]
48
- return angle_between(pc1, axis)
49
-
50
- def isotropy(vecs):
51
- """Ξ»_min / Ξ»_max β€” how spherical the point cloud is (0=line, 1=sphere)."""
52
- if vecs.shape[0] < 2:
53
- return 0.0
54
- cov = np.cov(vecs, rowvar=False)
55
- vals = np.linalg.eigvalsh(cov)
56
- vals = vals[vals > 1e-12]
57
- if len(vals) < 2:
58
- return 0.0
59
- return float(vals.min() / vals.max())
60
-
61
- # ── Pole separation label ─────────────────────────────────────────────────────
62
- def pole_sep_label(sep):
63
- if sep >= 0.5:
64
- return "strong", "The axis cleanly separates the two poles β€” results are reliable."
65
- elif sep >= 0.3:
66
- return "moderate", "The axis separates the poles reasonably well β€” results are meaningful."
67
- elif sep >= 0.15:
68
- return "weak", "The poles are only weakly separated β€” interpret results with caution."
69
- else:
70
- return "very weak", "The poles are barely distinguishable β€” axis may not be valid."
71
-
72
- # ── Position percentage helper ────────────────────────────────────────────────
73
- def position_pct(score, neg_mean, pos_mean):
74
- """Map a score to 0–100% between the two pole centroids."""
75
- span = pos_mean - neg_mean
76
- if abs(span) < 1e-9:
77
- return 50.0
78
- return float(np.clip((score - neg_mean) / span * 100, 0, 100))
79
-
80
- # ── Bar renderer ──────────────────────────────────────────────────────────────
81
- def render_bar(pct, label, width=44):
82
- pos = int(round(pct / 100 * width))
83
- bar = "β–‘" * pos + "●" + "β–‘" * (width - pos)
84
- return f" {bar} ({pct:.0f}%)\n β†’ {label}"
85
-
86
- # ── Spread label ──────────────────────────────────────────────────────────────
87
- def spread_label(spread, is_pole=False):
88
- if is_pole:
89
- if spread > 2.0:
90
- return "wide-ranging (as expected for a pole corpus)"
91
- else:
92
- return "fairly focused for a pole corpus"
93
- else:
94
- if spread < 1.0:
95
- return "very tightly focused"
96
- elif spread < 1.8:
97
- return "tightly focused"
98
- elif spread < 2.5:
99
- return "moderately varied"
100
- else:
101
- return "wide-ranging"
102
-
103
- # ── Reliability label from spread ──────────────────────��─────────────────────
104
- def reliability_label(spread):
105
- if spread < 1.0:
106
- return "very reliable β€” sentences are highly consistent"
107
- elif spread < 1.8:
108
- return "reliable β€” sentences cluster closely together"
109
- elif spread < 2.5:
110
- return "moderately reliable β€” some internal variation"
111
- else:
112
- return "less reliable β€” sentences pull in quite different directions"
113
-
114
- # ── Axis relevance label ──────────────────────────────────────────────────────
115
- def axis_relevance_label(angle):
116
- """How much of the text's variation runs along the pole axis."""
117
- if angle < 30:
118
- return "high", "sentences mostly differ by being more or less aligned with the poles"
119
- elif angle < 60:
120
- return "moderate", "sentences differ partly along the pole axis, partly on other dimensions"
121
  else:
122
- return "low", "sentences differ mainly on dimensions unrelated to this axis"
123
-
124
- # ── Gap interpretation ────────────────────────────────────────────────────────
125
- def gap_label(gap_pct):
126
- if gap_pct >= 40:
127
- return "very large β€” a clear, unmistakeable difference"
128
- elif gap_pct >= 25:
129
- return "substantial β€” a meaningful difference"
130
- elif gap_pct >= 12:
131
- return "moderate β€” a noticeable but not dramatic difference"
132
- elif gap_pct >= 5:
133
- return "small β€” the texts are fairly similar in position"
134
- else:
135
- return "negligible β€” no clear difference in position"
136
-
137
- # ── Main analysis function ────────────────────────────────────────────────────
138
- def run_analysis(
139
- pole_neg_name, pole_neg_text,
140
- pole_pos_name, pole_pos_text,
141
- text1_name, text1_text,
142
- text2_name, text2_text,
 
 
143
  ):
144
- # ── Parse inputs ──────────────────────────────────────────────────────
145
- pole_neg_sents = parse_sentences(pole_neg_text)
146
- pole_pos_sents = parse_sentences(pole_pos_text)
147
- text1_sents = parse_sentences(text1_text)
148
- text2_sents = parse_sentences(text2_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  errors = []
151
- if len(pole_neg_sents) < 3:
152
- errors.append(f"'{pole_neg_name}' pole needs at least 3 sentences.")
153
- if len(pole_pos_sents) < 3:
154
- errors.append(f"'{pole_pos_name}' pole needs at least 3 sentences.")
155
- if len(text1_sents) < 1:
156
- errors.append(f"'{text1_name}' needs at least 1 sentence.")
157
- if len(text2_sents) < 1:
158
- errors.append(f"'{text2_name}' needs at least 1 sentence.")
159
  if errors:
160
- return "\n".join(errors), None
161
 
162
- # ── Embed ─────────────────────────────────────────────────────────────
163
  model = get_model()
164
- all_sents = pole_neg_sents + pole_pos_sents + text1_sents + text2_sents
165
- all_vecs = model.encode(all_sents, normalize_embeddings=True,
166
- show_progress_bar=False)
167
-
168
- n_neg = len(pole_neg_sents)
169
- n_pos = len(pole_pos_sents)
170
- n_t1 = len(text1_sents)
171
-
172
- vecs_neg = all_vecs[:n_neg]
173
- vecs_pos = all_vecs[n_neg:n_neg+n_pos]
174
- vecs_t1 = all_vecs[n_neg+n_pos:n_neg+n_pos+n_t1]
175
- vecs_t2 = all_vecs[n_neg+n_pos+n_t1:]
176
-
177
- # ── Axis construction ─────────────────────────────────────────────────
178
- c_neg = vecs_neg.mean(axis=0)
179
- c_pos = vecs_pos.mean(axis=0)
180
- axis = unit(c_pos - c_neg)
181
-
182
- pole_sep = float(np.dot(c_pos, axis) - np.dot(c_neg, axis))
183
- sep_word, sep_note = pole_sep_label(pole_sep)
184
-
185
- # ── Projections ───────────────────────────────────────────────────────
186
- proj_neg = float(np.dot(c_neg, axis))
187
- proj_pos = float(np.dot(c_pos, axis))
188
- proj_t1 = float(np.dot(vecs_t1.mean(axis=0), axis))
189
- proj_t2 = float(np.dot(vecs_t2.mean(axis=0), axis))
190
-
191
- pct_neg = position_pct(proj_neg, proj_neg, proj_pos) # 0%
192
- pct_pos = position_pct(proj_pos, proj_neg, proj_pos) # 100%
193
- pct_t1 = position_pct(proj_t1, proj_neg, proj_pos)
194
- pct_t2 = position_pct(proj_t2, proj_neg, proj_pos)
195
-
196
- gap_pct = abs(pct_t1 - pct_t2)
197
-
198
- # ── Position labels ───────────────────────────────────────────────────
199
- def position_desc(pct, pn, pp):
200
- if pct <= 15:
201
- return f"very close to the {pn} pole"
202
- elif pct <= 35:
203
- return f"closer to the {pn} pole"
204
- elif pct <= 50:
205
- return f"slightly closer to the {pn} pole"
206
- elif pct <= 65:
207
- return f"slightly closer to the {pp} pole"
208
- elif pct <= 85:
209
- return f"closer to the {pp} pole"
210
- else:
211
- return f"very close to the {pp} pole"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- desc_t1 = position_desc(pct_t1, pole_neg_name, pole_pos_name)
214
- desc_t2 = position_desc(pct_t2, pole_neg_name, pole_pos_name)
215
 
216
- # ── Spread ────────────────────────────────────────────────────────────
217
- spread_neg = frobenius_spread(vecs_neg)
218
- spread_pos = frobenius_spread(vecs_pos)
219
- spread_t1 = frobenius_spread(vecs_t1)
220
- spread_t2 = frobenius_spread(vecs_t2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- rel_t1 = reliability_label(spread_t1)
223
- rel_t2 = reliability_label(spread_t2)
224
 
225
- # ── Axis relevance ────────────────────────────────────────────────────
226
- angle_t1 = pc1_axis_angle(vecs_t1, axis)
227
- angle_t2 = pc1_axis_angle(vecs_t2, axis)
228
- ar_word_t1, ar_desc_t1 = axis_relevance_label(angle_t1)
229
- ar_word_t2, ar_desc_t2 = axis_relevance_label(angle_t2)
 
 
 
230
 
231
- # ── Verdict ───────────────────────────────────────────────────────────
232
- gap_desc = gap_label(gap_pct)
233
 
234
- if gap_pct < 5:
235
- verdict = (f"No clear difference: {text1_name} and {text2_name} sit "
236
- f"in very similar positions on the {pole_neg_name}↔{pole_pos_name} spectrum.")
 
 
 
237
  else:
238
- closer_neg = text1_name if pct_t1 < pct_t2 else text2_name
239
- closer_pos = text2_name if pct_t1 < pct_t2 else text1_name
240
- verdict = (f"{closer_neg} aligns more closely with {pole_neg_name}; "
241
- f"{closer_pos} aligns more closely with {pole_pos_name}. "
242
- f"The gap between them is {gap_desc}.")
243
 
244
- # Reliability caveat
245
  caveats = []
246
- if spread_t1 > 2.5:
247
- caveats.append(f"{text1_name} is wide-ranging β€” its position score is an average of quite different sentences.")
248
- if spread_t2 > 2.5:
249
- caveats.append(f"{text2_name} is wide-ranging β€” its position score is an average of quite different sentences.")
250
- if sep_word in ("weak", "very weak"):
251
- caveats.append(f"The axis itself has {sep_word} pole separation β€” treat all results with caution.")
252
-
253
- # ── Report ────────────────────────────────────────────────────────────
 
 
254
  W = 62
255
- SEP = "═" * W
256
-
257
- lines = [
258
- SEP,
259
- " DISCOURSE COMPASS β€” Results",
260
- SEP,
261
- "",
262
- f" AXIS: {pole_neg_name} ←{'─'*20}β†’ {pole_pos_name}",
263
- f" Pole separation: {sep_word} ({pole_sep:.2f}) β€” {sep_note}",
264
- "",
265
- "─" * W,
266
- " WHERE EACH TEXT SITS ON THE SPECTRUM",
267
- "─" * W,
268
- f" Reading: 0% = {pole_neg_name} pole | 100% = {pole_pos_name} pole",
269
- "",
270
- f" {pole_neg_name} pole {'β–‘'*21}●{'β–‘'*21} (0%)",
271
- "",
272
- f" {text1_name}:",
273
- render_bar(pct_t1, desc_t1),
274
- "",
275
- f" {text2_name}:",
276
- render_bar(pct_t2, desc_t2),
277
- "",
278
- f" {pole_pos_name} pole {'β–‘'*21}●{'β–‘'*21} (100%)",
279
- "",
280
- f" Gap between texts: {gap_pct:.0f} percentage points β€” {gap_desc}.",
281
- "",
282
- "─" * W,
283
- " HOW CONSISTENTLY DO THE SENTENCES CLUSTER?",
284
- "─" * W,
285
- " A tight cluster means all sentences point in the same direction.",
286
- " A loose cluster means they pull in different directions β€” the",
287
- " position score becomes less reliable as an overall summary.",
288
- "",
289
- f" {pole_neg_name} pole spread = {spread_neg:.2f} β€” {spread_label(spread_neg, is_pole=True)}",
290
- f" {pole_pos_name} pole spread = {spread_pos:.2f} β€” {spread_label(spread_pos, is_pole=True)}",
291
- f" {text1_name:<22} spread = {spread_t1:.2f} β€” {spread_label(spread_t1)}",
292
- f" Position score is {rel_t1}.",
293
- f" {text2_name:<22} spread = {spread_t2:.2f} β€” {spread_label(spread_t2)}",
294
- f" Position score is {rel_t2}.",
295
- "",
296
- "─" * W,
297
- " HOW AXIS-RELEVANT IS THE VARIATION?",
298
- "─" * W,
299
- " This checks whether the sentences within each text differ from",
300
- " each other mainly along the pole axis, or mainly on unrelated",
301
- " dimensions (topic, register, tone, etc.).",
302
- "",
303
- f" {text1_name}: axis relevance is {ar_word_t1}",
304
- f" β†’ {ar_desc_t1}.",
305
- f" {text2_name}: axis relevance is {ar_word_t2}",
306
- f" β†’ {ar_desc_t2}.",
307
- "",
 
 
308
  ]
309
 
310
  if caveats:
311
- lines += ["─" * W, " ⚠ CAVEATS", "─" * W]
 
 
 
 
312
  for c in caveats:
313
- lines.append(f" β€’ {c}")
314
- lines.append("")
315
 
316
- lines += [
317
- "─" * W,
318
- " SUMMARY",
319
- "─" * W,
320
  f" {verdict}",
321
- "",
 
 
 
 
 
322
  ]
 
323
 
324
- if caveats:
325
- lines.append(" ⚠ See caveats above before drawing strong conclusions.")
326
- else:
327
- lines.append(" Results appear reliable. No major caveats.")
328
-
329
- lines += [
330
- "",
331
- SEP,
332
- " Measurements use the full 768-dimensional meaning space of",
333
- f" {MODEL_NAME}. The 3D map is a simplified view.",
334
- SEP,
335
- ]
336
 
337
- report = "\n".join(lines)
338
 
339
- # ── 3D Plot ───────────────────────────────────────────────────────────
340
- pca = PCA(n_components=3)
341
- all_study = np.vstack([vecs_neg, vecs_pos, vecs_t1, vecs_t2])
342
- coords = pca.fit_transform(all_study)
 
 
343
 
344
- i0 = 0
345
- i1 = n_neg
346
- i2 = n_neg + n_pos
347
- i3 = n_neg + n_pos + n_t1
 
348
 
349
- def make_trace(coords_slice, name, color, symbol, size=6):
350
- x, y, z = coords_slice[:,0], coords_slice[:,1], coords_slice[:,2]
351
- return go.Scatter3d(
352
- x=x, y=y, z=z,
353
- mode="markers",
354
- name=name,
355
- marker=dict(size=size, color=color, symbol=symbol, opacity=0.82),
356
- )
357
-
358
- # Pole centroid markers (larger stars)
359
- c_neg_3d = coords[:n_neg].mean(axis=0)
360
- c_pos_3d = coords[n_neg:n_neg+n_pos].mean(axis=0)
361
-
362
- traces = [
363
- make_trace(coords[i0:i1], f"{pole_neg_name} (pole)", "#e05555", "circle"),
364
- make_trace(coords[i1:i2], f"{pole_pos_name} (pole)", "#4a9eff", "circle"),
365
- make_trace(coords[i2:i3], text1_name, "#f5a623", "diamond", size=8),
366
- make_trace(coords[i3:], text2_name, "#7ed321", "square", size=8),
367
- go.Scatter3d(
368
- x=[c_neg_3d[0]], y=[c_neg_3d[1]], z=[c_neg_3d[2]],
369
- mode="markers+text",
370
- name=f"{pole_neg_name} centroid",
371
- text=[pole_neg_name],
372
- textposition="top center",
373
- marker=dict(size=12, color="#e05555", symbol="cross"),
374
- showlegend=False,
375
- ),
376
- go.Scatter3d(
377
- x=[c_pos_3d[0]], y=[c_pos_3d[1]], z=[c_pos_3d[2]],
378
- mode="markers+text",
379
- name=f"{pole_pos_name} centroid",
380
- text=[pole_pos_name],
381
- textposition="top center",
382
- marker=dict(size=12, color="#4a9eff", symbol="cross"),
383
- showlegend=False,
384
- ),
385
- ]
386
 
387
- fig = go.Figure(data=traces)
388
- fig.update_layout(
389
- title=dict(
390
- text=f"Sentence Clusters: {pole_neg_name} ↔ {pole_pos_name}",
391
- font=dict(color="#dde4f8", size=14),
392
- ),
393
- scene=dict(
394
- xaxis=dict(title=f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)",
395
- backgroundcolor="#0d0f1c", gridcolor="#1c2040",
396
- color="#8892bb"),
397
- yaxis=dict(title=f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)",
398
- backgroundcolor="#0d0f1c", gridcolor="#1c2040",
399
- color="#8892bb"),
400
- zaxis=dict(title=f"PC3 ({pca.explained_variance_ratio_[2]*100:.1f}%)",
401
- backgroundcolor="#0d0f1c", gridcolor="#1c2040",
402
- color="#8892bb"),
403
- bgcolor="#0d0f1c",
404
- ),
405
- paper_bgcolor="#0d0f1c",
406
- plot_bgcolor="#0d0f1c",
407
- font=dict(color="#dde4f8"),
408
- legend=dict(bgcolor="#13162a", bordercolor="#1c2040",
409
- font=dict(color="#dde4f8")),
410
- margin=dict(l=0, r=0, t=40, b=0),
411
- height=520,
412
- )
413
 
414
- return report, fig
 
 
415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
  # ── CSS ───────────────────────────────────────────────────────────────────────
418
  CSS = """
@@ -431,126 +595,133 @@ label span { color: #8892bb !important;
431
  border: none !important;
432
  font-weight: 800 !important;
433
  font-size: 1.05rem !important;
 
434
  border-radius: 10px !important; }
435
  .run-btn:hover { opacity: 0.86 !important; }
436
  .output-text textarea { font-family: 'Courier New', monospace !important;
437
  font-size: 0.79rem !important;
438
  color: #7dd8f8 !important;
439
- line-height: 1.6 !important; }
440
  h1, h2, h3, h4 { color: #dde4f8 !important; }
 
 
 
 
441
  """
442
 
443
- INTRO = """
444
- **Discourse Compass** positions any text on a spectrum between two semantic poles you define.
445
-
446
- Enter example sentences for each pole, then enter the texts you want to compare.
447
- Each sentence goes on its own line.
448
- """
449
 
450
  # ── UI ────────────────────────────────────────────────────────────────────────
451
  with gr.Blocks(title="Discourse Compass") as demo:
452
 
 
453
  gr.HTML("""
454
- <div style="padding:8px 0 16px 0;">
455
- <h1 style="color:#dde4f8;font-size:2rem;font-weight:900;
456
- margin-bottom:4px;letter-spacing:-0.5px;">
457
  🧭 Discourse Compass
458
  </h1>
459
- <p style="color:#5a6488;font-size:0.9rem;margin:0;">
460
- Position any text on a spectrum between two semantic poles β€”
461
- plain-language results for corpus analysts.
462
  </p>
463
  </div>""")
464
 
465
- gr.Markdown(INTRO)
 
 
 
 
 
 
 
 
 
 
466
 
467
  with gr.Row():
468
- # ── Pole A ────────────────────────────────────────────────────────
469
  with gr.Column():
470
- gr.HTML("<h3 style='color:#e05555;margin-bottom:4px;'>β—€ Pole A</h3>")
471
- pole_neg_name = gr.Textbox(
472
- value="growth critical",
473
- label="Name for Pole A",
474
- placeholder="e.g. ecocentric, conservative, pro-regulation …",
475
- )
476
- pole_neg_text = gr.Textbox(
477
- label="Example sentences for Pole A (one per line, min. 3)",
478
- lines=8,
479
- placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
480
- )
481
-
482
- # ── Pole B ────────────────────────────────────────────────────────
483
  with gr.Column():
484
- gr.HTML("<h3 style='color:#4a9eff;margin-bottom:4px;'>β–Ά Pole B</h3>")
485
- pole_pos_name = gr.Textbox(
486
- value="growth favoured",
487
- label="Name for Pole B",
488
- placeholder="e.g. anthropocentric, progressive, pro-market …",
489
- )
490
- pole_pos_text = gr.Textbox(
491
- label="Example sentences for Pole B (one per line, min. 3)",
492
- lines=8,
493
- placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
494
- )
495
-
496
- gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
 
 
497
 
498
  with gr.Row():
499
- # ── Text 1 ────────────────────────────────────────────────────────
500
  with gr.Column():
501
- gr.HTML("<h3 style='color:#f5a623;margin-bottom:4px;'>β—† Text A</h3>")
502
- text1_name = gr.Textbox(
503
- value="Text A",
504
- label="Name for Text A",
505
- placeholder="e.g. Financial News, Corpus 1, Policy Document …",
506
- )
507
- text1_text = gr.Textbox(
508
- label="Sentences from Text A (one per line)",
509
- lines=6,
510
- placeholder="Paste sentences here.\nOne sentence per line.",
511
- )
512
-
513
- # ── Text 2 ────────────────────────────────────────────────────────
514
  with gr.Column():
515
- gr.HTML("<h3 style='color:#7ed321;margin-bottom:4px;'>β—† Text B</h3>")
516
- text2_name = gr.Textbox(
517
- value="Text B",
518
- label="Name for Text B",
519
- placeholder="e.g. Climate Reporting, Corpus 2, Interview Data …",
520
- )
521
- text2_text = gr.Textbox(
522
- label="Sentences from Text B (one per line)",
523
- lines=6,
524
- placeholder="Paste sentences here.\nOne sentence per line.",
525
- )
526
-
527
- run_btn = gr.Button("β–Ά Run Analysis", elem_classes=["run-btn"])
528
-
529
- gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
530
-
531
- plot_out = gr.Plot(label="3D Sentence Map (rotate & zoom)")
532
- report_out = gr.Textbox(
533
- label="Results",
534
- lines=40,
535
- interactive=False,
536
- elem_classes=["output-text"],
537
- )
 
 
 
538
 
 
 
 
 
 
 
 
 
 
 
539
  run_btn.click(
540
  fn=run_analysis,
541
- inputs=[
542
- pole_neg_name, pole_neg_text,
543
- pole_pos_name, pole_pos_text,
544
- text1_name, text1_text,
545
- text2_name, text2_text,
546
- ],
547
- outputs=[report_out, plot_out],
548
  )
549
 
550
- gr.HTML("""
551
- <p style="color:#2a2e4a;font-size:0.73rem;text-align:center;
552
- margin-top:20px;padding-bottom:10px;">
553
- Embeddings: all-mpnet-base-v2 (768-dim) Β· H4rmony Project
 
 
554
  </p>""")
555
 
556
  if __name__ == "__main__":
 
1
  """
2
+ Discourse Compass β€” Gradio App for Linguists & General Public
3
  =============================================================
4
+ β€’ Interactive 3D Plotly scatter (rotate, zoom, pan)
5
+ β€’ Custom naming for poles and discourses
6
+ β€’ Plain-language results for non-technical users
7
+ β€’ Sentence embeddings via all-mpnet-base-v2 (768-dim)
8
  """
9
 
10
  import gradio as gr
 
12
  import plotly.graph_objects as go
13
  from sentence_transformers import SentenceTransformer
14
  from sklearn.decomposition import PCA
15
+ from scipy.spatial.distance import cosine, euclidean
16
 
17
  # ── Model ─────────────────────────────────────────────────────────────────────
18
  MODEL_NAME = "all-mpnet-base-v2"
19
+ MODEL_DIM = 768
20
  _model = None
21
 
22
  def get_model():
 
25
  _model = SentenceTransformer(MODEL_NAME)
26
  return _model
27
 
28
+ # ── Maths helpers ─────────────────────────────────────────────────────────────
29
  def parse_sentences(text):
30
  return [s.strip() for s in text.strip().splitlines() if s.strip()]
31
 
 
37
  c = abs(float(np.dot(unit(u), unit(v))))
38
  return float(np.degrees(np.arccos(min(c, 1.0))))
39
 
40
+ def thematic_breadth(vecs):
 
41
  return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
42
 
43
+ def principal_axis(vecs):
 
44
  if vecs.shape[0] < 2:
45
+ return np.zeros(vecs.shape[1]), np.eye(vecs.shape[1])
46
+ vals, evecs = np.linalg.eigh(np.cov(vecs, rowvar=False))
47
+ order = np.argsort(vals)[::-1]
48
+ return vals[order], evecs[:, order]
49
+
50
+ def semantic_heart(vecs):
51
+ return vecs.mean(axis=0)
52
+
53
+ # ── Plain-language interpretation helpers ─────────────────────────────────────
54
+ def breadth_label(score, all_scores):
55
+ mn, mx = min(all_scores), max(all_scores)
56
+ if mx == mn:
57
+ return "moderate"
58
+ r = (score - mn) / (mx - mn)
59
+ if r < 0.33:
60
+ return "tightly focused"
61
+ if r < 0.66:
62
+ return "moderately varied"
63
+ return "wide-ranging"
64
+
65
+ def orientation_label(angle):
66
+ if angle < 20:
67
+ return "closely tracks the pole-to-pole spectrum"
68
+ if angle < 45:
69
+ return "partly follows the pole-to-pole spectrum"
70
+ if angle < 70:
71
+ return "drifts away from the pole-to-pole spectrum"
72
+ return "varies independently of the pole-to-pole spectrum"
73
+
74
+ def strength_label(pct):
75
+ if pct > 0.6:
76
+ return "very consistent β€” sentences cluster in one direction"
77
+ if pct > 0.35:
78
+ return "moderately consistent"
79
+ return "diverse β€” sentences spread in many directions"
80
+
81
+ def pull_label(cos_a, cos_b, name_a, name_b):
82
+ diff = abs(cos_a - cos_b)
83
+ closer = name_a if cos_a < cos_b else name_b
84
+ if diff < 0.05:
85
+ return f"sits roughly halfway between {name_a} and {name_b}"
86
+ elif diff < 0.15:
87
+ return f"leans toward {closer}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
+ return f"clearly closer to {closer}"
90
+
91
+
92
+ # ── Plotly colour palette ─────────────────────────────────────────────────────
93
+ COLORS = {
94
+ "A": "#5aa8ff",
95
+ "B": "#ff6b6b",
96
+ "D1": "#3dd6a3",
97
+ "D2": "#ffcc55",
98
+ }
99
+
100
+ BG_COLOR = "#0d0f1c"
101
+ GRID_COLOR = "#1c2040"
102
+ TEXT_COLOR = "#cdd5f0"
103
+
104
+
105
+ # ── Interactive Plotly 3D renderer ────────────────────────────────────────────
106
+ def build_plotly_figure(
107
+ pts_a, pts_b, pts_d1, pts_d2,
108
+ c_a, c_b, c_d1, c_d2,
109
+ ev_a, ev_b, ev_d1, ev_d2,
110
+ pca_ev,
111
+ name_a, name_b, name_d1, name_d2,
112
  ):
113
+ fig = go.Figure()
114
+
115
+ # ── Sentence dots ─────────────────────────────────────────────────────
116
+ for pts, key, name, symbol in [
117
+ (pts_a, "A", name_a, "circle"),
118
+ (pts_b, "B", name_b, "circle"),
119
+ (pts_d1, "D1", name_d1, "square"),
120
+ (pts_d2, "D2", name_d2, "square"),
121
+ ]:
122
+ fig.add_trace(go.Scatter3d(
123
+ x=pts[:, 0], y=pts[:, 1], z=pts[:, 2],
124
+ mode="markers",
125
+ marker=dict(size=5, color=COLORS[key], symbol=symbol,
126
+ opacity=0.7, line=dict(width=0.5, color="white")),
127
+ name=f"{name} sentences",
128
+ legendgroup=key,
129
+ hovertemplate=f"{name} sentence<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
130
+ ))
131
+
132
+ # ── Centroids (diamonds) ──────────────────────────────────────────────
133
+ for c3, key, name in [
134
+ (c_a, "A", name_a),
135
+ (c_b, "B", name_b),
136
+ (c_d1, "D1", name_d1),
137
+ (c_d2, "D2", name_d2),
138
+ ]:
139
+ fig.add_trace(go.Scatter3d(
140
+ x=[c3[0]], y=[c3[1]], z=[c3[2]],
141
+ mode="markers+text",
142
+ marker=dict(size=10, color=COLORS[key], symbol="diamond",
143
+ line=dict(width=2, color="white")),
144
+ text=[f"β—† {name}"],
145
+ textposition="top center",
146
+ textfont=dict(color=COLORS[key], size=11),
147
+ name=f"β—† Centre of {name}",
148
+ legendgroup=key,
149
+ showlegend=True,
150
+ hovertemplate=f"Centre of {name}<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
151
+ ))
152
+
153
+ # ── Pole axis (dashed line A↔B) ───────────────────────────────────────
154
+ fig.add_trace(go.Scatter3d(
155
+ x=[c_a[0], c_b[0]], y=[c_a[1], c_b[1]], z=[c_a[2], c_b[2]],
156
+ mode="lines",
157
+ line=dict(color="white", width=3, dash="dash"),
158
+ name=f"Spectrum: {name_a} ↔ {name_b}",
159
+ opacity=0.5,
160
+ hoverinfo="skip",
161
+ ))
162
+
163
+ # ── Spokes: discourse centres β†’ pole centres ──────────────────────────
164
+ for c_disc, key, dname in [(c_d1, "D1", name_d1), (c_d2, "D2", name_d2)]:
165
+ for pole_pt, pname in [(c_a, name_a), (c_b, name_b)]:
166
+ fig.add_trace(go.Scatter3d(
167
+ x=[c_disc[0], pole_pt[0]],
168
+ y=[c_disc[1], pole_pt[1]],
169
+ z=[c_disc[2], pole_pt[2]],
170
+ mode="lines",
171
+ line=dict(color=COLORS[key], width=1.5, dash="dot"),
172
+ opacity=0.4,
173
+ showlegend=False,
174
+ hoverinfo="skip",
175
+ ))
176
+
177
+ # ── Principal direction arrows ────────────────────────────────────────
178
+ scale = 0.15
179
+ for c3, ev3, key, name in [
180
+ (c_a, ev_a, "A", name_a),
181
+ (c_b, ev_b, "B", name_b),
182
+ (c_d1, ev_d1, "D1", name_d1),
183
+ (c_d2, ev_d2, "D2", name_d2),
184
+ ]:
185
+ tip = c3 + ev3 * scale
186
+ tail = c3 - ev3 * scale
187
+ fig.add_trace(go.Scatter3d(
188
+ x=[tail[0], tip[0]], y=[tail[1], tip[1]], z=[tail[2], tip[2]],
189
+ mode="lines",
190
+ line=dict(color=COLORS[key], width=6),
191
+ showlegend=False,
192
+ hovertemplate=f"Direction of variation β€” {name}<extra></extra>",
193
+ ))
194
+ # arrowhead
195
+ fig.add_trace(go.Scatter3d(
196
+ x=[tip[0]], y=[tip[1]], z=[tip[2]],
197
+ mode="markers",
198
+ marker=dict(size=5, color=COLORS[key], symbol="diamond"),
199
+ showlegend=False,
200
+ hoverinfo="skip",
201
+ ))
202
+
203
+ # ── Layout ────────────────────────────────────────────────────────────
204
+ axis_template = dict(
205
+ backgroundcolor=BG_COLOR,
206
+ gridcolor=GRID_COLOR,
207
+ showbackground=True,
208
+ color=TEXT_COLOR,
209
+ tickfont=dict(size=9, color=TEXT_COLOR),
210
+ )
211
+
212
+ fig.update_layout(
213
+ scene=dict(
214
+ xaxis=dict(title=f"Meaning Axis 1 ({pca_ev[0]:.0%})", **axis_template),
215
+ yaxis=dict(title=f"Meaning Axis 2 ({pca_ev[1]:.0%})", **axis_template),
216
+ zaxis=dict(title=f"Meaning Axis 3 ({pca_ev[2]:.0%})", **axis_template),
217
+ ),
218
+ paper_bgcolor=BG_COLOR,
219
+ plot_bgcolor=BG_COLOR,
220
+ font=dict(color=TEXT_COLOR),
221
+ title=dict(
222
+ text=(
223
+ f"Discourse Compass β€” {name_a} vs {name_b}<br>"
224
+ f"<span style='font-size:12px;color:#5a6488;'>"
225
+ f"Drag to rotate Β· Scroll to zoom Β· {sum(pca_ev):.0%} of meaning variation shown</span>"
226
+ ),
227
+ x=0.5,
228
+ font=dict(size=16),
229
+ ),
230
+ legend=dict(
231
+ bgcolor="rgba(19,22,42,0.9)",
232
+ bordercolor=GRID_COLOR,
233
+ borderwidth=1,
234
+ font=dict(size=10, color=TEXT_COLOR),
235
+ ),
236
+ margin=dict(l=0, r=0, t=60, b=0),
237
+ height=620,
238
+ )
239
+
240
+ return fig
241
+
242
+
243
+ # ── Core analysis ─────────────────────────────────────────────────────────────
244
+ def run_analysis(text_a, text_b, text_d1, text_d2,
245
+ name_a, name_b, name_d1, name_d2):
246
+ # Default names if blank
247
+ name_a = name_a.strip() or "Pole A"
248
+ name_b = name_b.strip() or "Pole B"
249
+ name_d1 = name_d1.strip() or "Discourse 1"
250
+ name_d2 = name_d2.strip() or "Discourse 2"
251
+
252
+ sents_a = parse_sentences(text_a)
253
+ sents_b = parse_sentences(text_b)
254
+ sents_d1 = parse_sentences(text_d1)
255
+ sents_d2 = parse_sentences(text_d2)
256
 
257
  errors = []
258
+ if not sents_a:
259
+ errors.append(f"{name_a} needs at least 1 sentence.")
260
+ if not sents_b:
261
+ errors.append(f"{name_b} needs at least 1 sentence.")
262
+ if not sents_d1:
263
+ errors.append(f"{name_d1} needs at least 1 sentence.")
264
+ if not sents_d2:
265
+ errors.append(f"{name_d2} needs at least 1 sentence.")
266
  if errors:
267
+ return "⚠ " + " | ".join(errors), None
268
 
 
269
  model = get_model()
270
+ all_sents = sents_a + sents_b + sents_d1 + sents_d2
271
+ all_vecs = model.encode(all_sents, normalize_embeddings=False,
272
+ show_progress_bar=False)
273
+
274
+ na, nb, nd1, nd2 = len(sents_a), len(sents_b), len(sents_d1), len(sents_d2)
275
+ vecs_a = all_vecs[:na]
276
+ vecs_b = all_vecs[na:na + nb]
277
+ vecs_d1 = all_vecs[na + nb:na + nb + nd1]
278
+ vecs_d2 = all_vecs[na + nb + nd1:]
279
+
280
+ # Semantic Hearts (centroids)
281
+ heart_a = semantic_heart(vecs_a)
282
+ heart_b = semantic_heart(vecs_b)
283
+ heart_d1 = semantic_heart(vecs_d1)
284
+ heart_d2 = semantic_heart(vecs_d2)
285
+
286
+ # Thematic Breadth (spread)
287
+ bread_a = thematic_breadth(vecs_a)
288
+ bread_b = thematic_breadth(vecs_b)
289
+ bread_d1 = thematic_breadth(vecs_d1)
290
+ bread_d2 = thematic_breadth(vecs_d2)
291
+ all_breads = [bread_a, bread_b, bread_d1, bread_d2]
292
+
293
+ # Pole Orientation (eigenanalysis)
294
+ pole_vec = heart_b - heart_a
295
+
296
+ def cloud_eigen(vecs):
297
+ vals, evecs = principal_axis(vecs)
298
+ main = evecs[:, 0]
299
+ ang = angle_between(main, pole_vec)
300
+ exp = vals[0] / vals.sum() if vals.sum() > 1e-12 else 0.0
301
+ return main, ang, exp
302
+
303
+ ev_a, ang_a, exp_a = cloud_eigen(vecs_a)
304
+ ev_b, ang_b, exp_b = cloud_eigen(vecs_b)
305
+ ev_d1, ang_d1, exp_d1 = cloud_eigen(vecs_d1)
306
+ ev_d2, ang_d2, exp_d2 = cloud_eigen(vecs_d2)
307
+
308
+ # Centroid projection onto pole axis (scalar position)
309
+ pole_dir = unit(pole_vec)
310
+ proj_d1 = float(np.dot(heart_d1 - heart_a, pole_dir))
311
+ proj_d2 = float(np.dot(heart_d2 - heart_a, pole_dir))
312
+ pole_len = float(np.linalg.norm(pole_vec))
313
+ pct_d1 = proj_d1 / pole_len if pole_len > 1e-12 else 0.5
314
+ pct_d2 = proj_d2 / pole_len if pole_len > 1e-12 else 0.5
315
+
316
+ # PCA to 3D (visualisation only)
317
+ stack = np.vstack([all_vecs, heart_a, heart_b, heart_d1, heart_d2])
318
+ pca = PCA(n_components=3, random_state=42)
319
+ proj_3d = pca.fit_transform(stack)
320
+ pca_ev = pca.explained_variance_ratio_
321
+
322
+ n = len(all_sents)
323
+ pts_a_3d = proj_3d[:na]
324
+ pts_b_3d = proj_3d[na:na + nb]
325
+ pts_d1_3d = proj_3d[na + nb:na + nb + nd1]
326
+ pts_d2_3d = proj_3d[na + nb + nd1:n]
327
+ c_a_3d, c_b_3d = proj_3d[n], proj_3d[n + 1]
328
+ c_d1_3d, c_d2_3d = proj_3d[n + 2], proj_3d[n + 3]
329
+
330
+ # Rotate eigenvectors into 3D PCA space
331
+ ev_a_3d = unit(pca.components_ @ ev_a)
332
+ ev_b_3d = unit(pca.components_ @ ev_b)
333
+ ev_d1_3d = unit(pca.components_ @ ev_d1)
334
+ ev_d2_3d = unit(pca.components_ @ ev_d2)
335
+
336
+ # Build interactive Plotly figure
337
+ fig = build_plotly_figure(
338
+ pts_a_3d, pts_b_3d, pts_d1_3d, pts_d2_3d,
339
+ c_a_3d, c_b_3d, c_d1_3d, c_d2_3d,
340
+ ev_a_3d, ev_b_3d, ev_d1_3d, ev_d2_3d,
341
+ pca_ev,
342
+ name_a, name_b, name_d1, name_d2,
343
+ )
344
 
345
+ # ── Build plain-language report ───────────────────────────────────────
 
346
 
347
+ # Pole separation quality
348
+ pole_cos = float(cosine(heart_a, heart_b))
349
+ if pole_cos > 0.4:
350
+ sep_word = "strong"
351
+ sep_note = "The two poles are clearly distinct β€” results are reliable."
352
+ elif pole_cos > 0.2:
353
+ sep_word = "moderate"
354
+ sep_note = "The poles are reasonably distinct β€” results are meaningful."
355
+ else:
356
+ sep_word = "weak"
357
+ sep_note = "The poles are quite similar β€” consider using more contrasting sentences."
358
+
359
+ # Position bar (pole A = left anchor, pole B = right anchor)
360
+ def position_bar(pct, width=40):
361
+ pos = max(0, min(1, pct))
362
+ idx = int(round(pos * width))
363
+ bar = "β–‘" * idx + "●" + "β–‘" * (width - idx)
364
+ return bar
365
+
366
+ # Plain position description
367
+ def position_desc(pct, na, nb):
368
+ if pct <= 0.10:
369
+ return f"very close to the {na} pole"
370
+ elif pct <= 0.30:
371
+ return f"closer to {na}"
372
+ elif pct <= 0.45:
373
+ return f"slightly leaning toward {na}"
374
+ elif pct <= 0.55:
375
+ return f"roughly midway between {na} and {nb}"
376
+ elif pct <= 0.70:
377
+ return f"slightly leaning toward {nb}"
378
+ elif pct <= 0.90:
379
+ return f"closer to {nb}"
380
+ else:
381
+ return f"very close to the {nb} pole"
382
+
383
+ desc_d1 = position_desc(pct_d1, name_a, name_b)
384
+ desc_d2 = position_desc(pct_d2, name_a, name_b)
385
+
386
+ # Gap between texts
387
+ gap = abs(pct_d1 - pct_d2)
388
+ if gap < 0.05:
389
+ gap_desc = "no meaningful difference in position"
390
+ elif gap < 0.15:
391
+ gap_desc = "a small difference in position"
392
+ elif gap < 0.30:
393
+ gap_desc = "a moderate difference in position"
394
+ elif gap < 0.50:
395
+ gap_desc = "a substantial difference in position"
396
+ else:
397
+ gap_desc = "a very large difference in position"
398
+
399
+ # Cluster tightness as reliability
400
+ def reliability_label(spread, all_spreads):
401
+ mn, mx = min(all_spreads), max(all_spreads)
402
+ r = (spread - mn) / (mx - mn) if mx > mn else 0.5
403
+ if r < 0.25:
404
+ return "very consistent β€” position score is highly reliable"
405
+ elif r < 0.50:
406
+ return "fairly consistent β€” position score is reliable"
407
+ elif r < 0.75:
408
+ return "somewhat varied β€” position score is an average across different angles"
409
+ else:
410
+ return "wide-ranging β€” position score averages over quite different sentences"
411
 
412
+ rel_d1 = reliability_label(bread_d1, all_breads)
413
+ rel_d2 = reliability_label(bread_d2, all_breads)
414
 
415
+ # Axis relevance (brief caveat only)
416
+ def axis_relevance_note(angle):
417
+ if angle < 30:
418
+ return "sentences differ mainly along the pole spectrum"
419
+ elif angle < 60:
420
+ return "sentences differ partly along the spectrum, partly on other dimensions"
421
+ else:
422
+ return "sentences differ mainly on dimensions unrelated to this spectrum"
423
 
424
+ note_d1 = axis_relevance_note(ang_d1)
425
+ note_d2 = axis_relevance_note(ang_d2)
426
 
427
+ # Overall verdict
428
+ closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
429
+ closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
430
+ if gap < 0.05:
431
+ verdict = (f"No clear difference: {name_d1} and {name_d2} occupy very "
432
+ f"similar positions on the {name_a}↔{name_b} spectrum.")
433
  else:
434
+ verdict = (f"{closer_to_a} aligns more closely with {name_a}; "
435
+ f"{closer_to_b} aligns more closely with {name_b}. "
436
+ f"There is {gap_desc} between them ({gap:.0%} of the full spectrum).")
 
 
437
 
438
+ # Caveats
439
  caveats = []
440
+ if sep_word == "weak":
441
+ caveats.append(f"Pole separation is weak β€” the two poles are not very distinct in meaning space. "
442
+ f"Try adding more contrasting sentences to each pole.")
443
+ if bread_d1 > bread_b and bread_d1 > bread_a:
444
+ caveats.append(f"{name_d1} is more wide-ranging than either pole corpus β€” "
445
+ f"its position score averages over quite varied content.")
446
+ if bread_d2 > bread_b and bread_d2 > bread_a:
447
+ caveats.append(f"{name_d2} is more wide-ranging than either pole corpus β€” "
448
+ f"its position score averages over quite varied content.")
449
+
450
  W = 62
451
+ report_lines = [
452
+ f"{'═' * W}",
453
+ f" DISCOURSE COMPASS β€” Results",
454
+ f"{'═' * W}",
455
+ f"",
456
+ f" AXIS: {name_a} ←{'─' * 16}β†’ {name_b}",
457
+ f" Pole separation: {sep_word} β€” {sep_note}",
458
+ f" ({na} sentences in {name_a} pole Β· {nb} in {name_b} pole)",
459
+ f"",
460
+ f"{'─' * W}",
461
+ f" WHERE EACH TEXT SITS ON THE SPECTRUM",
462
+ f"{'─' * W}",
463
+ f" 0% = {name_a} pole 100% = {name_b} pole",
464
+ f"",
465
+ f" {name_a} pole",
466
+ f" {'β–‘' * 20}●{'β–‘' * 20} (0%)",
467
+ f"",
468
+ f" {name_d1} ({nd1} sentences)",
469
+ f" {position_bar(pct_d1)} ({pct_d1:.0%})",
470
+ f" β†’ {desc_d1}",
471
+ f"",
472
+ f" {name_d2} ({nd2} sentences)",
473
+ f" {position_bar(pct_d2)} ({pct_d2:.0%})",
474
+ f" β†’ {desc_d2}",
475
+ f"",
476
+ f" {name_b} pole",
477
+ f" {'β–‘' * 20}●{'β–‘' * 20} (100%)",
478
+ f"",
479
+ f" Gap between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
480
+ f" β†’ {gap_desc.capitalize()}.",
481
+ f"",
482
+ f"{'─' * W}",
483
+ f" HOW RELIABLY DO THE SENTENCES CLUSTER?",
484
+ f"{'─' * W}",
485
+ f" A tight cluster means all sentences point in the same",
486
+ f" direction β€” the position score is a reliable summary.",
487
+ f" A loose cluster means sentences pull in different",
488
+ f" directions β€” the score is an average and less decisive.",
489
+ f"",
490
+ f" {name_d1}: {rel_d1}.",
491
+ f" {name_d2}: {rel_d2}.",
492
+ f"",
493
+ f" For reference β€” how wide-ranging are the pole corpora?",
494
+ f" {name_a} pole: {breadth_label(bread_a, all_breads)}",
495
+ f" {name_b} pole: {breadth_label(bread_b, all_breads)}",
496
+ f"",
497
+ f"{'─' * W}",
498
+ f" AXIS ALIGNMENT NOTE",
499
+ f"{'─' * W}",
500
+ f" Do sentences within each text vary along the pole",
501
+ f" spectrum, or mainly on unrelated dimensions?",
502
+ f"",
503
+ f" {name_d1}: {note_d1}.",
504
+ f" {name_d2}: {note_d2}.",
505
+ f"",
506
  ]
507
 
508
  if caveats:
509
+ report_lines += [
510
+ f"{'─' * W}",
511
+ f" ⚠ CAVEATS",
512
+ f"{'─' * W}",
513
+ ]
514
  for c in caveats:
515
+ report_lines.append(f" β€’ {c}")
516
+ report_lines.append(f"")
517
 
518
+ report_lines += [
519
+ f"{'─' * W}",
520
+ f" SUMMARY",
521
+ f"{'─' * W}",
522
  f" {verdict}",
523
+ f"",
524
+ f"{'═' * W}",
525
+ f" All measurements use the full {MODEL_DIM}-dimensional meaning",
526
+ f" space of {MODEL_NAME}. The 3D map is a simplified view",
527
+ f" for visual orientation β€” rotate and zoom it above.",
528
+ f"{'═' * W}",
529
  ]
530
+ report = "\n".join(report_lines)
531
 
532
+ return report, fig
 
 
 
 
 
 
 
 
 
 
 
533
 
 
534
 
535
+ # ── Demo placeholders ─────────────────────────────────────────────────────────
536
+ PLACEHOLDER_A = """\
537
+ The economy is growing rapidly.
538
+ Unemployment is at a record low.
539
+ Businesses are thriving and profits are up.
540
+ Consumer spending is at an all-time high."""
541
 
542
+ PLACEHOLDER_B = """\
543
+ Climate change is an existential crisis.
544
+ We must reduce carbon emissions immediately.
545
+ Renewable energy is the only sustainable future.
546
+ The planet is warming at an alarming rate."""
547
 
548
+ PLACEHOLDER_D1 = """\
549
+ The stock market reached a new record today.
550
+ Interest rates are being adjusted to control inflation.
551
+ Foreign direct investment increased by 12% this quarter."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
+ PLACEHOLDER_D2 = """\
554
+ Arctic ice sheets are melting faster than predicted.
555
+ Scientists warn of irreversible tipping points.
556
+ Carbon capture technology is advancing but not fast enough."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
+ # ── Explainer content ─────────────────────────────────────────────────────────
559
+ EXPLAINER_HOW = """
560
+ ### How does this tool work?
561
 
562
+ Every sentence carries meaning. This tool uses an AI language model to translate
563
+ each sentence into a **point in meaning-space** β€” an invisible map where sentences
564
+ that mean similar things sit close together, and sentences with very different
565
+ meanings sit far apart.
566
+
567
+ You define **two poles** by giving example sentences for each β€” for instance,
568
+ *economic growth* vs *climate crisis*. These poles create a spectrum.
569
+
570
+ Then you enter two sets of text (the "discourses") and the tool measures
571
+ where each one sits on that spectrum. The results tell you:
572
+
573
+ - **Which pole each text is closer to** (and by how much)
574
+ - **How spread out** each set of sentences is (focused vs wide-ranging)
575
+ - **What direction** the sentences vary in (along the spectrum, or off to the side)
576
+
577
+ The 3D map lets you **see** the results β€” each dot is a sentence, and you can
578
+ rotate and zoom to explore how they cluster.
579
+ """
580
 
581
  # ── CSS ───────────────────────────────────────────────────────────────────────
582
  CSS = """
 
595
  border: none !important;
596
  font-weight: 800 !important;
597
  font-size: 1.05rem !important;
598
+ letter-spacing: 0.03em !important;
599
  border-radius: 10px !important; }
600
  .run-btn:hover { opacity: 0.86 !important; }
601
  .output-text textarea { font-family: 'Courier New', monospace !important;
602
  font-size: 0.79rem !important;
603
  color: #7dd8f8 !important;
604
+ line-height: 1.55 !important; }
605
  h1, h2, h3, h4 { color: #dde4f8 !important; }
606
+ .gr-accordion { border: 1px solid #1c2040 !important;
607
+ border-radius: 10px !important; }
608
+ .name-box input { font-weight: 700 !important;
609
+ font-size: 0.95rem !important; }
610
  """
611
 
 
 
 
 
 
 
612
 
613
  # ── UI ────────────────────────────────────────────────────────────────────────
614
  with gr.Blocks(title="Discourse Compass") as demo:
615
 
616
+ # ── Header ────────────────────────────────────────────────────────────
617
  gr.HTML("""
618
+ <div style="padding: 8px 0 20px 0;">
619
+ <h1 style="color:#dde4f8; font-size:2rem; font-weight:900;
620
+ margin-bottom:6px; letter-spacing:-0.5px;">
621
  🧭 Discourse Compass
622
  </h1>
623
+ <p style="color:#5a6488; font-size:0.92rem; margin:0; max-width:700px;">
624
+ Define two semantic poles with example sentences, then find out where
625
+ any text sits between them β€” with plain-language explanations.
626
  </p>
627
  </div>""")
628
 
629
+ with gr.Accordion("πŸ’‘ How does this work? (click to read)", open=False):
630
+ gr.Markdown(EXPLAINER_HOW)
631
+
632
+ gr.HTML("<hr style='border-color:#1c2040; margin: 8px 0 20px 0;'>")
633
+
634
+ # ── Step 1: Poles ─────────────────────────────────────────────────────
635
+ gr.HTML("""
636
+ <h3 style="color:#dde4f8; margin-bottom:4px;">Step 1 β€” Define your two poles</h3>
637
+ <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
638
+ Enter several sentences that represent each extreme. One sentence per line.
639
+ </p>""")
640
 
641
  with gr.Row():
 
642
  with gr.Column():
643
+ gr.HTML("<span style='color:#5aa8ff;font-weight:700;'>πŸ”΅ POLE A</span>")
644
+ name_a_box = gr.Textbox(label="Name for Pole A",
645
+ value="Economic Growth",
646
+ elem_classes=["name-box"])
647
+ pole_a = gr.Textbox(label="Sentences β€” one per line",
648
+ lines=7, value=PLACEHOLDER_A)
 
 
 
 
 
 
 
649
  with gr.Column():
650
+ gr.HTML("<span style='color:#ff6b6b;font-weight:700;'>πŸ”΄ POLE B</span>")
651
+ name_b_box = gr.Textbox(label="Name for Pole B",
652
+ value="Climate Crisis",
653
+ elem_classes=["name-box"])
654
+ pole_b = gr.Textbox(label="Sentences β€” one per line",
655
+ lines=7, value=PLACEHOLDER_B)
656
+
657
+ gr.HTML("<hr style='border-color:#1c2040; margin: 20px 0;'>")
658
+
659
+ # ── Step 2: Discourses ────────────────────────────────────────────────
660
+ gr.HTML("""
661
+ <h3 style="color:#dde4f8; margin-bottom:4px;">Step 2 β€” Enter the texts to analyse</h3>
662
+ <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
663
+ These are the texts whose position between the poles you want to measure.
664
+ </p>""")
665
 
666
  with gr.Row():
 
667
  with gr.Column():
668
+ gr.HTML("<span style='color:#3dd6a3;font-weight:700;'>🟒 TEXT 1</span>")
669
+ name_d1_box = gr.Textbox(label="Name for Text 1",
670
+ value="Financial News",
671
+ elem_classes=["name-box"])
672
+ disc1 = gr.Textbox(label="Sentences β€” one per line",
673
+ lines=5, value=PLACEHOLDER_D1)
 
 
 
 
 
 
 
674
  with gr.Column():
675
+ gr.HTML("<span style='color:#ffcc55;font-weight:700;'>🟑 TEXT 2</span>")
676
+ name_d2_box = gr.Textbox(label="Name for Text 2",
677
+ value="Climate Reporting",
678
+ elem_classes=["name-box"])
679
+ disc2 = gr.Textbox(label="Sentences β€” one per line",
680
+ lines=5, value=PLACEHOLDER_D2)
681
+
682
+ # ── Run button ────────────────────────────────────────────────────────
683
+ gr.HTML("<div style='margin: 24px 0 8px 0;'>")
684
+ run_btn = gr.Button("⚑ Run Analysis", variant="primary",
685
+ size="lg", elem_classes=["run-btn"])
686
+ gr.HTML("</div>")
687
+
688
+ gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
689
+
690
+ # ── Results ───────────────────────────────────────────────────────────
691
+ gr.HTML("""
692
+ <h3 style="color:#dde4f8; margin: 0 0 4px 0;">πŸ“Š Interactive Semantic Map</h3>
693
+ <p style="color:#5a6488; font-size:0.84rem; margin:0 0 12px 0;">
694
+ Each dot is a sentence. Diamonds (β—†) mark the centre of each group.
695
+ <strong>Drag to rotate Β· scroll to zoom Β· click legend items to toggle.</strong>
696
+ </p>""")
697
+
698
+ plot_out = gr.Plot(label="Semantic Map")
699
+
700
+ gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
701
 
702
+ gr.HTML("""
703
+ <h3 style="color:#dde4f8; margin: 0 0 4px 0;">πŸ“‹ Results Report</h3>
704
+ <p style="color:#5a6488; font-size:0.84rem; margin:0 0 10px 0;">
705
+ Plain-language summary of every measurement.
706
+ </p>""")
707
+
708
+ text_out = gr.Textbox(label="Results", lines=42, interactive=False,
709
+ elem_classes=["output-text"])
710
+
711
+ # ── Wire up events ─────────────────────────���──────────────────────────
712
  run_btn.click(
713
  fn=run_analysis,
714
+ inputs=[pole_a, pole_b, disc1, disc2,
715
+ name_a_box, name_b_box, name_d1_box, name_d2_box],
716
+ outputs=[text_out, plot_out],
 
 
 
 
717
  )
718
 
719
+ gr.HTML(f"""
720
+ <p style="color:#1e2440; font-size:0.74rem; text-align:center;
721
+ margin-top:28px; padding-bottom:12px;">
722
+ All measurements use the full {MODEL_DIM}-dimensional meaning space of
723
+ <code>{MODEL_NAME}</code>.
724
+ The 3D map is a simplified view (PCA) for orientation only.
725
  </p>""")
726
 
727
  if __name__ == "__main__":