neovalle commited on
Commit
f5be592
Β·
verified Β·
1 Parent(s): e8fdf77

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +450 -522
app.py CHANGED
@@ -1,10 +1,9 @@
1
  """
2
- Discourse Compass β€” Gradio App for Linguists & General Public
3
  =============================================================
4
- β€’ Interactive 3D Plotly scatter (rotate, zoom, pan)
5
- β€’ Custom naming for poles and discourses
6
- β€’ Plain-language results for non-technical users
7
- β€’ Sentence embeddings via all-mpnet-base-v2 (768-dim)
8
  """
9
 
10
  import gradio as gr
@@ -12,11 +11,9 @@ import numpy as np
12
  import plotly.graph_objects as go
13
  from sentence_transformers import SentenceTransformer
14
  from sklearn.decomposition import PCA
15
- from scipy.spatial.distance import cosine, euclidean
16
 
17
  # ── Model ─────────────────────────────────────────────────────────────────────
18
  MODEL_NAME = "all-mpnet-base-v2"
19
- MODEL_DIM = 768
20
  _model = None
21
 
22
  def get_model():
@@ -25,7 +22,7 @@ def get_model():
25
  _model = SentenceTransformer(MODEL_NAME)
26
  return _model
27
 
28
- # ── Maths helpers ─────────────────────────────────────────────────────────────
29
  def parse_sentences(text):
30
  return [s.strip() for s in text.strip().splitlines() if s.strip()]
31
 
@@ -37,448 +34,386 @@ def angle_between(u, v):
37
  c = abs(float(np.dot(unit(u), unit(v))))
38
  return float(np.degrees(np.arccos(min(c, 1.0))))
39
 
40
- def thematic_breadth(vecs):
 
41
  return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
42
 
43
- def principal_axis(vecs):
 
44
  if vecs.shape[0] < 2:
45
- return np.zeros(vecs.shape[1]), np.eye(vecs.shape[1])
46
- vals, evecs = np.linalg.eigh(np.cov(vecs, rowvar=False))
47
- order = np.argsort(vals)[::-1]
48
- return vals[order], evecs[:, order]
49
-
50
- def semantic_heart(vecs):
51
- return vecs.mean(axis=0)
52
-
53
- # ── Plain-language interpretation helpers ─────────────────────────────────────
54
- def breadth_label(score, all_scores):
55
- mn, mx = min(all_scores), max(all_scores)
56
- if mx == mn:
57
- return "moderate"
58
- r = (score - mn) / (mx - mn)
59
- if r < 0.33:
60
- return "tightly focused"
61
- if r < 0.66:
62
- return "moderately varied"
63
- return "wide-ranging"
64
-
65
- def orientation_label(angle):
66
- if angle < 20:
67
- return "closely tracks the pole-to-pole spectrum"
68
- if angle < 45:
69
- return "partly follows the pole-to-pole spectrum"
70
- if angle < 70:
71
- return "drifts away from the pole-to-pole spectrum"
72
- return "varies independently of the pole-to-pole spectrum"
73
-
74
- def strength_label(pct):
75
- if pct > 0.6:
76
- return "very consistent β€” sentences cluster in one direction"
77
- if pct > 0.35:
78
- return "moderately consistent"
79
- return "diverse β€” sentences spread in many directions"
80
-
81
- def pull_label(cos_a, cos_b, name_a, name_b):
82
- diff = abs(cos_a - cos_b)
83
- closer = name_a if cos_a < cos_b else name_b
84
- if diff < 0.05:
85
- return f"sits roughly halfway between {name_a} and {name_b}"
86
- elif diff < 0.15:
87
- return f"leans toward {closer}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
- return f"clearly closer to {closer}"
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # ── Plotly colour palette ─────────────────────────────────────────────────────
93
- COLORS = {
94
- "A": "#5aa8ff",
95
- "B": "#ff6b6b",
96
- "D1": "#3dd6a3",
97
- "D2": "#ffcc55",
98
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- BG_COLOR = "#0d0f1c"
101
- GRID_COLOR = "#1c2040"
102
- TEXT_COLOR = "#cdd5f0"
 
 
 
 
 
 
 
 
 
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # ── Interactive Plotly 3D renderer ────────────────────────────────────────────
106
- def build_plotly_figure(
107
- pts_a, pts_b, pts_d1, pts_d2,
108
- c_a, c_b, c_d1, c_d2,
109
- ev_a, ev_b, ev_d1, ev_d2,
110
- pca_ev,
111
- name_a, name_b, name_d1, name_d2,
112
- ):
113
- fig = go.Figure()
114
-
115
- # ── Sentence dots ─────────────────────────────────────────────────────
116
- for pts, key, name, symbol in [
117
- (pts_a, "A", name_a, "circle"),
118
- (pts_b, "B", name_b, "circle"),
119
- (pts_d1, "D1", name_d1, "square"),
120
- (pts_d2, "D2", name_d2, "square"),
121
- ]:
122
- fig.add_trace(go.Scatter3d(
123
- x=pts[:, 0], y=pts[:, 1], z=pts[:, 2],
124
  mode="markers",
125
- marker=dict(size=5, color=COLORS[key], symbol=symbol,
126
- opacity=0.7, line=dict(width=0.5, color="white")),
127
- name=f"{name} sentences",
128
- legendgroup=key,
129
- hovertemplate=f"{name} sentence<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
130
- ))
131
-
132
- # ── Centroids (diamonds) ──────────────────────────────────────────────
133
- for c3, key, name in [
134
- (c_a, "A", name_a),
135
- (c_b, "B", name_b),
136
- (c_d1, "D1", name_d1),
137
- (c_d2, "D2", name_d2),
138
- ]:
139
- fig.add_trace(go.Scatter3d(
140
- x=[c3[0]], y=[c3[1]], z=[c3[2]],
141
  mode="markers+text",
142
- marker=dict(size=10, color=COLORS[key], symbol="diamond",
143
- line=dict(width=2, color="white")),
144
- text=[f"β—† {name}"],
145
  textposition="top center",
146
- textfont=dict(color=COLORS[key], size=11),
147
- name=f"β—† Centre of {name}",
148
- legendgroup=key,
149
- showlegend=True,
150
- hovertemplate=f"Centre of {name}<br>(%{{x:.3f}}, %{{y:.3f}}, %{{z:.3f}})<extra></extra>",
151
- ))
152
-
153
- # ── Pole axis (dashed line A↔B) ───────────────────────────────────────
154
- fig.add_trace(go.Scatter3d(
155
- x=[c_a[0], c_b[0]], y=[c_a[1], c_b[1]], z=[c_a[2], c_b[2]],
156
- mode="lines",
157
- line=dict(color="white", width=3, dash="dash"),
158
- name=f"Spectrum: {name_a} ↔ {name_b}",
159
- opacity=0.5,
160
- hoverinfo="skip",
161
- ))
162
-
163
- # ── Spokes: discourse centres β†’ pole centres ──────────────────────────
164
- for c_disc, key, dname in [(c_d1, "D1", name_d1), (c_d2, "D2", name_d2)]:
165
- for pole_pt, pname in [(c_a, name_a), (c_b, name_b)]:
166
- fig.add_trace(go.Scatter3d(
167
- x=[c_disc[0], pole_pt[0]],
168
- y=[c_disc[1], pole_pt[1]],
169
- z=[c_disc[2], pole_pt[2]],
170
- mode="lines",
171
- line=dict(color=COLORS[key], width=1.5, dash="dot"),
172
- opacity=0.4,
173
- showlegend=False,
174
- hoverinfo="skip",
175
- ))
176
-
177
- # ── Principal direction arrows ────────────────────────────────────────
178
- scale = 0.15
179
- for c3, ev3, key, name in [
180
- (c_a, ev_a, "A", name_a),
181
- (c_b, ev_b, "B", name_b),
182
- (c_d1, ev_d1, "D1", name_d1),
183
- (c_d2, ev_d2, "D2", name_d2),
184
- ]:
185
- tip = c3 + ev3 * scale
186
- tail = c3 - ev3 * scale
187
- fig.add_trace(go.Scatter3d(
188
- x=[tail[0], tip[0]], y=[tail[1], tip[1]], z=[tail[2], tip[2]],
189
- mode="lines",
190
- line=dict(color=COLORS[key], width=6),
191
  showlegend=False,
192
- hovertemplate=f"Direction of variation β€” {name}<extra></extra>",
193
- ))
194
- # arrowhead
195
- fig.add_trace(go.Scatter3d(
196
- x=[tip[0]], y=[tip[1]], z=[tip[2]],
197
- mode="markers",
198
- marker=dict(size=5, color=COLORS[key], symbol="diamond"),
 
199
  showlegend=False,
200
- hoverinfo="skip",
201
- ))
202
-
203
- # ── Layout ────────────────────────────────────────────────────────────
204
- axis_template = dict(
205
- backgroundcolor=BG_COLOR,
206
- gridcolor=GRID_COLOR,
207
- showbackground=True,
208
- color=TEXT_COLOR,
209
- tickfont=dict(size=9, color=TEXT_COLOR),
210
- )
211
 
 
212
  fig.update_layout(
213
- scene=dict(
214
- xaxis=dict(title=f"Meaning Axis 1 ({pca_ev[0]:.0%})", **axis_template),
215
- yaxis=dict(title=f"Meaning Axis 2 ({pca_ev[1]:.0%})", **axis_template),
216
- zaxis=dict(title=f"Meaning Axis 3 ({pca_ev[2]:.0%})", **axis_template),
217
- ),
218
- paper_bgcolor=BG_COLOR,
219
- plot_bgcolor=BG_COLOR,
220
- font=dict(color=TEXT_COLOR),
221
  title=dict(
222
- text=(
223
- f"Discourse Compass β€” {name_a} vs {name_b}<br>"
224
- f"<span style='font-size:12px;color:#5a6488;'>"
225
- f"Drag to rotate Β· Scroll to zoom Β· {sum(pca_ev):.0%} of meaning variation shown</span>"
226
- ),
227
- x=0.5,
228
- font=dict(size=16),
229
  ),
230
- legend=dict(
231
- bgcolor="rgba(19,22,42,0.9)",
232
- bordercolor=GRID_COLOR,
233
- borderwidth=1,
234
- font=dict(size=10, color=TEXT_COLOR),
 
 
 
 
 
 
235
  ),
236
- margin=dict(l=0, r=0, t=60, b=0),
237
- height=620,
 
 
 
 
 
238
  )
239
 
240
- return fig
241
-
242
-
243
- # ── Core analysis ─────────────────────────────────────────────────────────────
244
- def run_analysis(text_a, text_b, text_d1, text_d2,
245
- name_a, name_b, name_d1, name_d2):
246
- # Default names if blank
247
- name_a = name_a.strip() or "Pole A"
248
- name_b = name_b.strip() or "Pole B"
249
- name_d1 = name_d1.strip() or "Discourse 1"
250
- name_d2 = name_d2.strip() or "Discourse 2"
251
-
252
- sents_a = parse_sentences(text_a)
253
- sents_b = parse_sentences(text_b)
254
- sents_d1 = parse_sentences(text_d1)
255
- sents_d2 = parse_sentences(text_d2)
256
-
257
- errors = []
258
- if not sents_a:
259
- errors.append(f"{name_a} needs at least 1 sentence.")
260
- if not sents_b:
261
- errors.append(f"{name_b} needs at least 1 sentence.")
262
- if not sents_d1:
263
- errors.append(f"{name_d1} needs at least 1 sentence.")
264
- if not sents_d2:
265
- errors.append(f"{name_d2} needs at least 1 sentence.")
266
- if errors:
267
- return "⚠ " + " | ".join(errors), None
268
-
269
- model = get_model()
270
- all_sents = sents_a + sents_b + sents_d1 + sents_d2
271
- all_vecs = model.encode(all_sents, normalize_embeddings=False,
272
- show_progress_bar=False)
273
-
274
- na, nb, nd1, nd2 = len(sents_a), len(sents_b), len(sents_d1), len(sents_d2)
275
- vecs_a = all_vecs[:na]
276
- vecs_b = all_vecs[na:na + nb]
277
- vecs_d1 = all_vecs[na + nb:na + nb + nd1]
278
- vecs_d2 = all_vecs[na + nb + nd1:]
279
-
280
- # Semantic Hearts (centroids)
281
- heart_a = semantic_heart(vecs_a)
282
- heart_b = semantic_heart(vecs_b)
283
- heart_d1 = semantic_heart(vecs_d1)
284
- heart_d2 = semantic_heart(vecs_d2)
285
-
286
- # Thematic Breadth (spread)
287
- bread_a = thematic_breadth(vecs_a)
288
- bread_b = thematic_breadth(vecs_b)
289
- bread_d1 = thematic_breadth(vecs_d1)
290
- bread_d2 = thematic_breadth(vecs_d2)
291
- all_breads = [bread_a, bread_b, bread_d1, bread_d2]
292
-
293
- # Pole Orientation (eigenanalysis)
294
- pole_vec = heart_b - heart_a
295
-
296
- def cloud_eigen(vecs):
297
- vals, evecs = principal_axis(vecs)
298
- main = evecs[:, 0]
299
- ang = angle_between(main, pole_vec)
300
- exp = vals[0] / vals.sum() if vals.sum() > 1e-12 else 0.0
301
- return main, ang, exp
302
-
303
- ev_a, ang_a, exp_a = cloud_eigen(vecs_a)
304
- ev_b, ang_b, exp_b = cloud_eigen(vecs_b)
305
- ev_d1, ang_d1, exp_d1 = cloud_eigen(vecs_d1)
306
- ev_d2, ang_d2, exp_d2 = cloud_eigen(vecs_d2)
307
-
308
- # Centroid projection onto pole axis (scalar position)
309
- pole_dir = unit(pole_vec)
310
- proj_d1 = float(np.dot(heart_d1 - heart_a, pole_dir))
311
- proj_d2 = float(np.dot(heart_d2 - heart_a, pole_dir))
312
- pole_len = float(np.linalg.norm(pole_vec))
313
- pct_d1 = proj_d1 / pole_len if pole_len > 1e-12 else 0.5
314
- pct_d2 = proj_d2 / pole_len if pole_len > 1e-12 else 0.5
315
-
316
- # PCA to 3D (visualisation only)
317
- stack = np.vstack([all_vecs, heart_a, heart_b, heart_d1, heart_d2])
318
- pca = PCA(n_components=3, random_state=42)
319
- proj_3d = pca.fit_transform(stack)
320
- pca_ev = pca.explained_variance_ratio_
321
-
322
- n = len(all_sents)
323
- pts_a_3d = proj_3d[:na]
324
- pts_b_3d = proj_3d[na:na + nb]
325
- pts_d1_3d = proj_3d[na + nb:na + nb + nd1]
326
- pts_d2_3d = proj_3d[na + nb + nd1:n]
327
- c_a_3d, c_b_3d = proj_3d[n], proj_3d[n + 1]
328
- c_d1_3d, c_d2_3d = proj_3d[n + 2], proj_3d[n + 3]
329
-
330
- # Rotate eigenvectors into 3D PCA space
331
- ev_a_3d = unit(pca.components_ @ ev_a)
332
- ev_b_3d = unit(pca.components_ @ ev_b)
333
- ev_d1_3d = unit(pca.components_ @ ev_d1)
334
- ev_d2_3d = unit(pca.components_ @ ev_d2)
335
-
336
- # Build interactive Plotly figure
337
- fig = build_plotly_figure(
338
- pts_a_3d, pts_b_3d, pts_d1_3d, pts_d2_3d,
339
- c_a_3d, c_b_3d, c_d1_3d, c_d2_3d,
340
- ev_a_3d, ev_b_3d, ev_d1_3d, ev_d2_3d,
341
- pca_ev,
342
- name_a, name_b, name_d1, name_d2,
343
- )
344
-
345
- # ── Build plain-language report ───────────────────────────────────────
346
- cos_d1_a = float(cosine(heart_d1, heart_a))
347
- cos_d1_b = float(cosine(heart_d1, heart_b))
348
- cos_d2_a = float(cosine(heart_d2, heart_a))
349
- cos_d2_b = float(cosine(heart_d2, heart_b))
350
-
351
- pole_cos = float(cosine(heart_a, heart_b))
352
- if pole_cos > 0.4:
353
- pole_sep_txt = "well separated β€” they represent clearly different viewpoints"
354
- elif pole_cos > 0.2:
355
- pole_sep_txt = "moderately separated"
356
- else:
357
- pole_sep_txt = "quite close β€” consider using more contrasting sentences"
358
-
359
- def position_bar(pct, width=30):
360
- """Visual bar showing where a discourse sits on the A↔B spectrum."""
361
- pos = max(0, min(1, pct))
362
- idx = int(round(pos * width))
363
- return "β”‚" + "─" * idx + "●" + "─" * (width - idx) + "β”‚"
364
-
365
- report_lines = [
366
- f"{'═' * 62}",
367
- f" DISCOURSE COMPASS β€” Results",
368
- f"{'═' * 62}",
369
- f"",
370
- f" SETUP",
371
- f" ─────────────────────────────────────────────────────────",
372
- f" Pole: {name_a} ({na} sentences)",
373
- f" Pole: {name_b} ({nb} sentences)",
374
- f" Text: {name_d1} ({nd1} sentences)",
375
- f" Text: {name_d2} ({nd2} sentences)",
376
- f" Pole separation: {pole_sep_txt}",
377
- f"",
378
- f"",
379
- f" WHERE EACH TEXT SITS ON THE SPECTRUM",
380
- f" ─────────────────────────────────────────────────────────",
381
- f" Think of a line stretching from {name_a} on the left",
382
- f" to {name_b} on the right. Where does each text land?",
383
- f"",
384
- f" {name_a:<20s} {name_b}",
385
- f" {name_d1}:",
386
- f" {position_bar(pct_d1)} ({pct_d1:.0%} toward {name_b})",
387
- f" β†’ {pull_label(cos_d1_a, cos_d1_b, name_a, name_b)}",
388
- f"",
389
- f" {name_d2}:",
390
- f" {position_bar(pct_d2)} ({pct_d2:.0%} toward {name_b})",
391
- f" β†’ {pull_label(cos_d2_a, cos_d2_b, name_a, name_b)}",
392
- f"",
393
- f"",
394
- f" HOW SPREAD OUT IS EACH SET OF SENTENCES?",
395
- f" ─────────────────────────────────────────────────────────",
396
- f" Low = all sentences say roughly the same thing",
397
- f" High = sentences cover many different angles",
398
- f"",
399
- f" {name_a:<22s} {bread_a:.3f} β€” {breadth_label(bread_a, all_breads)}",
400
- f" {name_b:<22s} {bread_b:.3f} β€” {breadth_label(bread_b, all_breads)}",
401
- f" {name_d1:<22s} {bread_d1:.3f} β€” {breadth_label(bread_d1, all_breads)}",
402
- f" {name_d2:<22s} {bread_d2:.3f} β€” {breadth_label(bread_d2, all_breads)}",
403
- f"",
404
- f"",
405
- f" WHAT DIRECTION DOES EACH TEXT VARY IN?",
406
- f" ─────────────────────────────────────────────────────────",
407
- f" This asks: when sentences in a group differ from each",
408
- f" other, do they differ along the {name_a}↔{name_b}",
409
- f" spectrum, or in some unrelated direction?",
410
- f"",
411
- f" 0Β° = variation runs along the spectrum",
412
- f" 90Β° = variation runs in a completely different direction",
413
- f"",
414
- f" {name_a:<22s} {ang_a:5.1f}Β° β€” {orientation_label(ang_a)}",
415
- f" {name_b:<22s} {ang_b:5.1f}Β° β€” {orientation_label(ang_b)}",
416
- f" {name_d1:<22s} {ang_d1:5.1f}Β° β€” {orientation_label(ang_d1)}",
417
- f" {name_d2:<22s} {ang_d2:5.1f}Β° β€” {orientation_label(ang_d2)}",
418
- f"",
419
- f" How consistent is each group?",
420
- f" {name_a:<22s} {exp_a:.0%} β€” {strength_label(exp_a)}",
421
- f" {name_b:<22s} {exp_b:.0%} β€” {strength_label(exp_b)}",
422
- f" {name_d1:<22s} {exp_d1:.0%} β€” {strength_label(exp_d1)}",
423
- f" {name_d2:<22s} {exp_d2:.0%} β€” {strength_label(exp_d2)}",
424
- f"",
425
- f"{'═' * 62}",
426
- f" All measurements use the full {MODEL_DIM}-dimensional meaning",
427
- f" space of {MODEL_NAME}. The 3D map is a simplified view",
428
- f" for visual orientation β€” rotate and zoom it above.",
429
- f"{'═' * 62}",
430
- ]
431
- report = "\n".join(report_lines)
432
-
433
  return report, fig
434
 
435
 
436
- # ── Demo placeholders ─────────────────────────────────────────────────────────
437
- PLACEHOLDER_A = """\
438
- The economy is growing rapidly.
439
- Unemployment is at a record low.
440
- Businesses are thriving and profits are up.
441
- Consumer spending is at an all-time high."""
442
-
443
- PLACEHOLDER_B = """\
444
- Climate change is an existential crisis.
445
- We must reduce carbon emissions immediately.
446
- Renewable energy is the only sustainable future.
447
- The planet is warming at an alarming rate."""
448
-
449
- PLACEHOLDER_D1 = """\
450
- The stock market reached a new record today.
451
- Interest rates are being adjusted to control inflation.
452
- Foreign direct investment increased by 12% this quarter."""
453
-
454
- PLACEHOLDER_D2 = """\
455
- Arctic ice sheets are melting faster than predicted.
456
- Scientists warn of irreversible tipping points.
457
- Carbon capture technology is advancing but not fast enough."""
458
-
459
- # ── Explainer content ─────────────────────────────────────────────────────────
460
- EXPLAINER_HOW = """
461
- ### How does this tool work?
462
-
463
- Every sentence carries meaning. This tool uses an AI language model to translate
464
- each sentence into a **point in meaning-space** β€” an invisible map where sentences
465
- that mean similar things sit close together, and sentences with very different
466
- meanings sit far apart.
467
-
468
- You define **two poles** by giving example sentences for each β€” for instance,
469
- *economic growth* vs *climate crisis*. These poles create a spectrum.
470
-
471
- Then you enter two sets of text (the "discourses") and the tool measures
472
- where each one sits on that spectrum. The results tell you:
473
-
474
- - **Which pole each text is closer to** (and by how much)
475
- - **How spread out** each set of sentences is (focused vs wide-ranging)
476
- - **What direction** the sentences vary in (along the spectrum, or off to the side)
477
-
478
- The 3D map lets you **see** the results β€” each dot is a sentence, and you can
479
- rotate and zoom to explore how they cluster.
480
- """
481
-
482
  # ── CSS ───────────────────────────────────────────────────────────────────────
483
  CSS = """
484
  body, .gradio-container { background: #0d0f1c !important; }
@@ -496,133 +431,126 @@ label span { color: #8892bb !important;
496
  border: none !important;
497
  font-weight: 800 !important;
498
  font-size: 1.05rem !important;
499
- letter-spacing: 0.03em !important;
500
  border-radius: 10px !important; }
501
  .run-btn:hover { opacity: 0.86 !important; }
502
  .output-text textarea { font-family: 'Courier New', monospace !important;
503
  font-size: 0.79rem !important;
504
  color: #7dd8f8 !important;
505
- line-height: 1.55 !important; }
506
  h1, h2, h3, h4 { color: #dde4f8 !important; }
507
- .gr-accordion { border: 1px solid #1c2040 !important;
508
- border-radius: 10px !important; }
509
- .name-box input { font-weight: 700 !important;
510
- font-size: 0.95rem !important; }
511
  """
512
 
 
 
 
 
 
 
513
 
514
  # ── UI ────────────────────────────────────────────────────────────────────────
515
  with gr.Blocks(css=CSS, title="Discourse Compass") as demo:
516
 
517
- # ── Header ────────────────────────────────────────────────────────────
518
  gr.HTML("""
519
- <div style="padding: 8px 0 20px 0;">
520
- <h1 style="color:#dde4f8; font-size:2rem; font-weight:900;
521
- margin-bottom:6px; letter-spacing:-0.5px;">
522
  🧭 Discourse Compass
523
  </h1>
524
- <p style="color:#5a6488; font-size:0.92rem; margin:0; max-width:700px;">
525
- Define two semantic poles with example sentences, then find out where
526
- any text sits between them β€” with plain-language explanations.
527
  </p>
528
  </div>""")
529
 
530
- with gr.Accordion("πŸ’‘ How does this work? (click to read)", open=False):
531
- gr.Markdown(EXPLAINER_HOW)
532
-
533
- gr.HTML("<hr style='border-color:#1c2040; margin: 8px 0 20px 0;'>")
534
-
535
- # ── Step 1: Poles ─────────────────────────────────────────────────────
536
- gr.HTML("""
537
- <h3 style="color:#dde4f8; margin-bottom:4px;">Step 1 β€” Define your two poles</h3>
538
- <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
539
- Enter several sentences that represent each extreme. One sentence per line.
540
- </p>""")
541
 
542
  with gr.Row():
 
543
  with gr.Column():
544
- gr.HTML("<span style='color:#5aa8ff;font-weight:700;'>πŸ”΅ POLE A</span>")
545
- name_a_box = gr.Textbox(label="Name for Pole A",
546
- value="Economic Growth",
547
- elem_classes=["name-box"])
548
- pole_a = gr.Textbox(label="Sentences β€” one per line",
549
- lines=7, value=PLACEHOLDER_A)
 
 
 
 
 
 
 
550
  with gr.Column():
551
- gr.HTML("<span style='color:#ff6b6b;font-weight:700;'>πŸ”΄ POLE B</span>")
552
- name_b_box = gr.Textbox(label="Name for Pole B",
553
- value="Climate Crisis",
554
- elem_classes=["name-box"])
555
- pole_b = gr.Textbox(label="Sentences β€” one per line",
556
- lines=7, value=PLACEHOLDER_B)
557
-
558
- gr.HTML("<hr style='border-color:#1c2040; margin: 20px 0;'>")
559
-
560
- # ── Step 2: Discourses ────────────────────────────────────────────────
561
- gr.HTML("""
562
- <h3 style="color:#dde4f8; margin-bottom:4px;">Step 2 β€” Enter the texts to analyse</h3>
563
- <p style="color:#5a6488; font-size:0.86rem; margin:0 0 14px 0;">
564
- These are the texts whose position between the poles you want to measure.
565
- </p>""")
566
 
567
  with gr.Row():
 
568
  with gr.Column():
569
- gr.HTML("<span style='color:#3dd6a3;font-weight:700;'>🟒 TEXT 1</span>")
570
- name_d1_box = gr.Textbox(label="Name for Text 1",
571
- value="Financial News",
572
- elem_classes=["name-box"])
573
- disc1 = gr.Textbox(label="Sentences β€” one per line",
574
- lines=5, value=PLACEHOLDER_D1)
 
 
 
 
 
 
 
575
  with gr.Column():
576
- gr.HTML("<span style='color:#ffcc55;font-weight:700;'>🟑 TEXT 2</span>")
577
- name_d2_box = gr.Textbox(label="Name for Text 2",
578
- value="Climate Reporting",
579
- elem_classes=["name-box"])
580
- disc2 = gr.Textbox(label="Sentences β€” one per line",
581
- lines=5, value=PLACEHOLDER_D2)
582
-
583
- # ── Run button ────────────────────────────────────────────────────────
584
- gr.HTML("<div style='margin: 24px 0 8px 0;'>")
585
- run_btn = gr.Button("⚑ Run Analysis", variant="primary",
586
- size="lg", elem_classes=["run-btn"])
587
- gr.HTML("</div>")
588
-
589
- gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
590
-
591
- # ── Results ───────────────────────────────────────────────────────────
592
- gr.HTML("""
593
- <h3 style="color:#dde4f8; margin: 0 0 4px 0;">πŸ“Š Interactive Semantic Map</h3>
594
- <p style="color:#5a6488; font-size:0.84rem; margin:0 0 12px 0;">
595
- Each dot is a sentence. Diamonds (β—†) mark the centre of each group.
596
- <strong>Drag to rotate Β· scroll to zoom Β· click legend items to toggle.</strong>
597
- </p>""")
598
-
599
- plot_out = gr.Plot(label="Semantic Map")
600
-
601
- gr.HTML("<hr style='border-color:#1c2040; margin: 24px 0 16px 0;'>")
602
-
603
- gr.HTML("""
604
- <h3 style="color:#dde4f8; margin: 0 0 4px 0;">πŸ“‹ Results Report</h3>
605
- <p style="color:#5a6488; font-size:0.84rem; margin:0 0 10px 0;">
606
- Plain-language summary of every measurement.
607
- </p>""")
608
-
609
- text_out = gr.Textbox(label="Results", lines=42, interactive=False,
610
- elem_classes=["output-text"])
611
 
612
- # ── Wire up events ────────────────────────────────────────────────────
613
  run_btn.click(
614
  fn=run_analysis,
615
- inputs=[pole_a, pole_b, disc1, disc2,
616
- name_a_box, name_b_box, name_d1_box, name_d2_box],
617
- outputs=[text_out, plot_out],
 
 
 
 
618
  )
619
 
620
- gr.HTML(f"""
621
- <p style="color:#1e2440; font-size:0.74rem; text-align:center;
622
- margin-top:28px; padding-bottom:12px;">
623
- All measurements use the full {MODEL_DIM}-dimensional meaning space of
624
- <code>{MODEL_NAME}</code>.
625
- The 3D map is a simplified view (PCA) for orientation only.
626
  </p>""")
627
 
628
  if __name__ == "__main__":
 
1
  """
2
+ Discourse Compass β€” Gradio App
3
  =============================================================
4
+ Spatial-geometric discourse analysis for corpus analysts.
5
+ Plain-language results focused on position, cluster tightness,
6
+ and comparative interpretation.
 
7
  """
8
 
9
  import gradio as gr
 
11
  import plotly.graph_objects as go
12
  from sentence_transformers import SentenceTransformer
13
  from sklearn.decomposition import PCA
 
14
 
15
  # ── Model ─────────────────────────────────────────────────────────────────────
16
  MODEL_NAME = "all-mpnet-base-v2"
 
17
  _model = None
18
 
19
  def get_model():
 
22
  _model = SentenceTransformer(MODEL_NAME)
23
  return _model
24
 
25
+ # ── Helpers ───────────────────────────────────────────────────────────────────
26
  def parse_sentences(text):
27
  return [s.strip() for s in text.strip().splitlines() if s.strip()]
28
 
 
34
  c = abs(float(np.dot(unit(u), unit(v))))
35
  return float(np.degrees(np.arccos(min(c, 1.0))))
36
 
37
+ def frobenius_spread(vecs):
38
+ """Total spread of a point cloud (Frobenius norm of centred matrix)."""
39
  return float(np.linalg.norm(vecs - vecs.mean(axis=0), "fro"))
40
 
41
+ def pc1_axis_angle(vecs, axis):
42
+ """Angle between the first principal component and a given axis vector."""
43
  if vecs.shape[0] < 2:
44
+ return 90.0
45
+ cov = np.cov(vecs, rowvar=False)
46
+ vals, evecs = np.linalg.eigh(cov)
47
+ pc1 = evecs[:, np.argmax(vals)]
48
+ return angle_between(pc1, axis)
49
+
50
+ def isotropy(vecs):
51
+ """Ξ»_min / Ξ»_max β€” how spherical the point cloud is (0=line, 1=sphere)."""
52
+ if vecs.shape[0] < 2:
53
+ return 0.0
54
+ cov = np.cov(vecs, rowvar=False)
55
+ vals = np.linalg.eigvalsh(cov)
56
+ vals = vals[vals > 1e-12]
57
+ if len(vals) < 2:
58
+ return 0.0
59
+ return float(vals.min() / vals.max())
60
+
61
+ # ── Pole separation label ─────────────────────────────────────────────────────
62
+ def pole_sep_label(sep):
63
+ if sep >= 0.5:
64
+ return "strong", "The axis cleanly separates the two poles β€” results are reliable."
65
+ elif sep >= 0.3:
66
+ return "moderate", "The axis separates the poles reasonably well β€” results are meaningful."
67
+ elif sep >= 0.15:
68
+ return "weak", "The poles are only weakly separated β€” interpret results with caution."
69
+ else:
70
+ return "very weak", "The poles are barely distinguishable β€” axis may not be valid."
71
+
72
+ # ── Position percentage helper ─────────���──────────────────────────────────────
73
+ def position_pct(score, neg_mean, pos_mean):
74
+ """Map a score to 0–100% between the two pole centroids."""
75
+ span = pos_mean - neg_mean
76
+ if abs(span) < 1e-9:
77
+ return 50.0
78
+ return float(np.clip((score - neg_mean) / span * 100, 0, 100))
79
+
80
+ # ── Bar renderer ──────────────────────────────────────────────────────────────
81
+ def render_bar(pct, label, width=44):
82
+ pos = int(round(pct / 100 * width))
83
+ bar = "β–‘" * pos + "●" + "β–‘" * (width - pos)
84
+ return f" {bar} ({pct:.0f}%)\n β†’ {label}"
85
+
86
+ # ── Spread label ──────────────────────────────────────────────────────────────
87
+ def spread_label(spread, is_pole=False):
88
+ if is_pole:
89
+ if spread > 2.0:
90
+ return "wide-ranging (as expected for a pole corpus)"
91
+ else:
92
+ return "fairly focused for a pole corpus"
93
+ else:
94
+ if spread < 1.0:
95
+ return "very tightly focused"
96
+ elif spread < 1.8:
97
+ return "tightly focused"
98
+ elif spread < 2.5:
99
+ return "moderately varied"
100
+ else:
101
+ return "wide-ranging"
102
+
103
+ # ── Reliability label from spread ────────────────────────────────────────────
104
+ def reliability_label(spread):
105
+ if spread < 1.0:
106
+ return "very reliable β€” sentences are highly consistent"
107
+ elif spread < 1.8:
108
+ return "reliable β€” sentences cluster closely together"
109
+ elif spread < 2.5:
110
+ return "moderately reliable β€” some internal variation"
111
+ else:
112
+ return "less reliable β€” sentences pull in quite different directions"
113
+
114
+ # ── Axis relevance label ──────────────────────────────────────────────────────
115
+ def axis_relevance_label(angle):
116
+ """How much of the text's variation runs along the pole axis."""
117
+ if angle < 30:
118
+ return "high", "sentences mostly differ by being more or less aligned with the poles"
119
+ elif angle < 60:
120
+ return "moderate", "sentences differ partly along the pole axis, partly on other dimensions"
121
+ else:
122
+ return "low", "sentences differ mainly on dimensions unrelated to this axis"
123
+
124
+ # ── Gap interpretation ────────────────────────────────────────────────────────
125
+ def gap_label(gap_pct):
126
+ if gap_pct >= 40:
127
+ return "very large β€” a clear, unmistakeable difference"
128
+ elif gap_pct >= 25:
129
+ return "substantial β€” a meaningful difference"
130
+ elif gap_pct >= 12:
131
+ return "moderate β€” a noticeable but not dramatic difference"
132
+ elif gap_pct >= 5:
133
+ return "small β€” the texts are fairly similar in position"
134
  else:
135
+ return "negligible β€” no clear difference in position"
136
+
137
+ # ── Main analysis function ────────────────────────────────────────────────────
138
+ def run_analysis(
139
+ pole_neg_name, pole_neg_text,
140
+ pole_pos_name, pole_pos_text,
141
+ text1_name, text1_text,
142
+ text2_name, text2_text,
143
+ ):
144
+ # ── Parse inputs ──────────────────────────────────────────────────────
145
+ pole_neg_sents = parse_sentences(pole_neg_text)
146
+ pole_pos_sents = parse_sentences(pole_pos_text)
147
+ text1_sents = parse_sentences(text1_text)
148
+ text2_sents = parse_sentences(text2_text)
149
 
150
+ errors = []
151
+ if len(pole_neg_sents) < 3:
152
+ errors.append(f"'{pole_neg_name}' pole needs at least 3 sentences.")
153
+ if len(pole_pos_sents) < 3:
154
+ errors.append(f"'{pole_pos_name}' pole needs at least 3 sentences.")
155
+ if len(text1_sents) < 1:
156
+ errors.append(f"'{text1_name}' needs at least 1 sentence.")
157
+ if len(text2_sents) < 1:
158
+ errors.append(f"'{text2_name}' needs at least 1 sentence.")
159
+ if errors:
160
+ return "\n".join(errors), None
161
 
162
+ # ── Embed ────────────────────────────────────────────────────────────��
163
+ model = get_model()
164
+ all_sents = pole_neg_sents + pole_pos_sents + text1_sents + text2_sents
165
+ all_vecs = model.encode(all_sents, normalize_embeddings=True,
166
+ show_progress_bar=False)
167
+
168
+ n_neg = len(pole_neg_sents)
169
+ n_pos = len(pole_pos_sents)
170
+ n_t1 = len(text1_sents)
171
+
172
+ vecs_neg = all_vecs[:n_neg]
173
+ vecs_pos = all_vecs[n_neg:n_neg+n_pos]
174
+ vecs_t1 = all_vecs[n_neg+n_pos:n_neg+n_pos+n_t1]
175
+ vecs_t2 = all_vecs[n_neg+n_pos+n_t1:]
176
+
177
+ # ── Axis construction ─────────────────────────────────────────────────
178
+ c_neg = vecs_neg.mean(axis=0)
179
+ c_pos = vecs_pos.mean(axis=0)
180
+ axis = unit(c_pos - c_neg)
181
+
182
+ pole_sep = float(np.dot(c_pos, axis) - np.dot(c_neg, axis))
183
+ sep_word, sep_note = pole_sep_label(pole_sep)
184
+
185
+ # ── Projections ───────────────────────────────────────────────────────
186
+ proj_neg = float(np.dot(c_neg, axis))
187
+ proj_pos = float(np.dot(c_pos, axis))
188
+ proj_t1 = float(np.dot(vecs_t1.mean(axis=0), axis))
189
+ proj_t2 = float(np.dot(vecs_t2.mean(axis=0), axis))
190
+
191
+ pct_neg = position_pct(proj_neg, proj_neg, proj_pos) # 0%
192
+ pct_pos = position_pct(proj_pos, proj_neg, proj_pos) # 100%
193
+ pct_t1 = position_pct(proj_t1, proj_neg, proj_pos)
194
+ pct_t2 = position_pct(proj_t2, proj_neg, proj_pos)
195
+
196
+ gap_pct = abs(pct_t1 - pct_t2)
197
+
198
+ # ── Position labels ───────────────────────────────────────────────────
199
+ def position_desc(pct, pn, pp):
200
+ if pct <= 15:
201
+ return f"very close to the {pn} pole"
202
+ elif pct <= 35:
203
+ return f"closer to the {pn} pole"
204
+ elif pct <= 50:
205
+ return f"slightly closer to the {pn} pole"
206
+ elif pct <= 65:
207
+ return f"slightly closer to the {pp} pole"
208
+ elif pct <= 85:
209
+ return f"closer to the {pp} pole"
210
+ else:
211
+ return f"very close to the {pp} pole"
212
+
213
+ desc_t1 = position_desc(pct_t1, pole_neg_name, pole_pos_name)
214
+ desc_t2 = position_desc(pct_t2, pole_neg_name, pole_pos_name)
215
+
216
+ # ── Spread ────────────────────────────────────────────────────────────
217
+ spread_neg = frobenius_spread(vecs_neg)
218
+ spread_pos = frobenius_spread(vecs_pos)
219
+ spread_t1 = frobenius_spread(vecs_t1)
220
+ spread_t2 = frobenius_spread(vecs_t2)
221
+
222
+ rel_t1 = reliability_label(spread_t1)
223
+ rel_t2 = reliability_label(spread_t2)
224
+
225
+ # ── Axis relevance ────────────────────────────────────────────────────
226
+ angle_t1 = pc1_axis_angle(vecs_t1, axis)
227
+ angle_t2 = pc1_axis_angle(vecs_t2, axis)
228
+ ar_word_t1, ar_desc_t1 = axis_relevance_label(angle_t1)
229
+ ar_word_t2, ar_desc_t2 = axis_relevance_label(angle_t2)
230
+
231
+ # ── Verdict ───────────────────────────────────────────────────────────
232
+ gap_desc = gap_label(gap_pct)
233
+
234
+ if gap_pct < 5:
235
+ verdict = (f"No clear difference: {text1_name} and {text2_name} sit "
236
+ f"in very similar positions on the {pole_neg_name}↔{pole_pos_name} spectrum.")
237
+ else:
238
+ closer_neg = text1_name if pct_t1 < pct_t2 else text2_name
239
+ closer_pos = text2_name if pct_t1 < pct_t2 else text1_name
240
+ verdict = (f"{closer_neg} aligns more closely with {pole_neg_name}; "
241
+ f"{closer_pos} aligns more closely with {pole_pos_name}. "
242
+ f"The gap between them is {gap_desc}.")
243
+
244
+ # Reliability caveat
245
+ caveats = []
246
+ if spread_t1 > 2.5:
247
+ caveats.append(f"{text1_name} is wide-ranging β€” its position score is an average of quite different sentences.")
248
+ if spread_t2 > 2.5:
249
+ caveats.append(f"{text2_name} is wide-ranging β€” its position score is an average of quite different sentences.")
250
+ if sep_word in ("weak", "very weak"):
251
+ caveats.append(f"The axis itself has {sep_word} pole separation β€” treat all results with caution.")
252
+
253
+ # ── Report ────────────────────────────────────────────────────────────
254
+ W = 62
255
+ SEP = "═" * W
256
+
257
+ lines = [
258
+ SEP,
259
+ " DISCOURSE COMPASS β€” Results",
260
+ SEP,
261
+ "",
262
+ f" AXIS: {pole_neg_name} ←{'─'*20}β†’ {pole_pos_name}",
263
+ f" Pole separation: {sep_word} ({pole_sep:.2f}) β€” {sep_note}",
264
+ "",
265
+ "─" * W,
266
+ " WHERE EACH TEXT SITS ON THE SPECTRUM",
267
+ "─" * W,
268
+ f" Reading: 0% = {pole_neg_name} pole | 100% = {pole_pos_name} pole",
269
+ "",
270
+ f" {pole_neg_name} pole {'β–‘'*21}●{'β–‘'*21} (0%)",
271
+ "",
272
+ f" {text1_name}:",
273
+ render_bar(pct_t1, desc_t1),
274
+ "",
275
+ f" {text2_name}:",
276
+ render_bar(pct_t2, desc_t2),
277
+ "",
278
+ f" {pole_pos_name} pole {'β–‘'*21}●{'β–‘'*21} (100%)",
279
+ "",
280
+ f" Gap between texts: {gap_pct:.0f} percentage points β€” {gap_desc}.",
281
+ "",
282
+ "─" * W,
283
+ " HOW CONSISTENTLY DO THE SENTENCES CLUSTER?",
284
+ "─" * W,
285
+ " A tight cluster means all sentences point in the same direction.",
286
+ " A loose cluster means they pull in different directions β€” the",
287
+ " position score becomes less reliable as an overall summary.",
288
+ "",
289
+ f" {pole_neg_name} pole spread = {spread_neg:.2f} β€” {spread_label(spread_neg, is_pole=True)}",
290
+ f" {pole_pos_name} pole spread = {spread_pos:.2f} β€” {spread_label(spread_pos, is_pole=True)}",
291
+ f" {text1_name:<22} spread = {spread_t1:.2f} β€” {spread_label(spread_t1)}",
292
+ f" Position score is {rel_t1}.",
293
+ f" {text2_name:<22} spread = {spread_t2:.2f} β€” {spread_label(spread_t2)}",
294
+ f" Position score is {rel_t2}.",
295
+ "",
296
+ "─" * W,
297
+ " HOW AXIS-RELEVANT IS THE VARIATION?",
298
+ "─" * W,
299
+ " This checks whether the sentences within each text differ from",
300
+ " each other mainly along the pole axis, or mainly on unrelated",
301
+ " dimensions (topic, register, tone, etc.).",
302
+ "",
303
+ f" {text1_name}: axis relevance is {ar_word_t1}",
304
+ f" β†’ {ar_desc_t1}.",
305
+ f" {text2_name}: axis relevance is {ar_word_t2}",
306
+ f" β†’ {ar_desc_t2}.",
307
+ "",
308
+ ]
309
 
310
+ if caveats:
311
+ lines += ["─" * W, " ⚠ CAVEATS", "─" * W]
312
+ for c in caveats:
313
+ lines.append(f" β€’ {c}")
314
+ lines.append("")
315
+
316
+ lines += [
317
+ "─" * W,
318
+ " SUMMARY",
319
+ "─" * W,
320
+ f" {verdict}",
321
+ "",
322
+ ]
323
 
324
+ if caveats:
325
+ lines.append(" ⚠ See caveats above before drawing strong conclusions.")
326
+ else:
327
+ lines.append(" Results appear reliable. No major caveats.")
328
+
329
+ lines += [
330
+ "",
331
+ SEP,
332
+ " Measurements use the full 768-dimensional meaning space of",
333
+ f" {MODEL_NAME}. The 3D map is a simplified view.",
334
+ SEP,
335
+ ]
336
 
337
+ report = "\n".join(lines)
338
+
339
+ # ── 3D Plot ───────────────────────────────────────────────────────────
340
+ pca = PCA(n_components=3)
341
+ all_study = np.vstack([vecs_neg, vecs_pos, vecs_t1, vecs_t2])
342
+ coords = pca.fit_transform(all_study)
343
+
344
+ i0 = 0
345
+ i1 = n_neg
346
+ i2 = n_neg + n_pos
347
+ i3 = n_neg + n_pos + n_t1
348
+
349
+ def make_trace(coords_slice, name, color, symbol, size=6):
350
+ x, y, z = coords_slice[:,0], coords_slice[:,1], coords_slice[:,2]
351
+ return go.Scatter3d(
352
+ x=x, y=y, z=z,
 
 
 
353
  mode="markers",
354
+ name=name,
355
+ marker=dict(size=size, color=color, symbol=symbol, opacity=0.82),
356
+ )
357
+
358
+ # Pole centroid markers (larger stars)
359
+ c_neg_3d = coords[:n_neg].mean(axis=0)
360
+ c_pos_3d = coords[n_neg:n_neg+n_pos].mean(axis=0)
361
+
362
+ traces = [
363
+ make_trace(coords[i0:i1], f"{pole_neg_name} (pole)", "#e05555", "circle"),
364
+ make_trace(coords[i1:i2], f"{pole_pos_name} (pole)", "#4a9eff", "circle"),
365
+ make_trace(coords[i2:i3], text1_name, "#f5a623", "diamond", size=8),
366
+ make_trace(coords[i3:], text2_name, "#7ed321", "square", size=8),
367
+ go.Scatter3d(
368
+ x=[c_neg_3d[0]], y=[c_neg_3d[1]], z=[c_neg_3d[2]],
 
369
  mode="markers+text",
370
+ name=f"{pole_neg_name} centroid",
371
+ text=[pole_neg_name],
 
372
  textposition="top center",
373
+ marker=dict(size=12, color="#e05555", symbol="cross"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  showlegend=False,
375
+ ),
376
+ go.Scatter3d(
377
+ x=[c_pos_3d[0]], y=[c_pos_3d[1]], z=[c_pos_3d[2]],
378
+ mode="markers+text",
379
+ name=f"{pole_pos_name} centroid",
380
+ text=[pole_pos_name],
381
+ textposition="top center",
382
+ marker=dict(size=12, color="#4a9eff", symbol="cross"),
383
  showlegend=False,
384
+ ),
385
+ ]
 
 
 
 
 
 
 
 
 
386
 
387
+ fig = go.Figure(data=traces)
388
  fig.update_layout(
 
 
 
 
 
 
 
 
389
  title=dict(
390
+ text=f"Sentence Clusters: {pole_neg_name} ↔ {pole_pos_name}",
391
+ font=dict(color="#dde4f8", size=14),
 
 
 
 
 
392
  ),
393
+ scene=dict(
394
+ xaxis=dict(title=f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)",
395
+ backgroundcolor="#0d0f1c", gridcolor="#1c2040",
396
+ color="#8892bb"),
397
+ yaxis=dict(title=f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)",
398
+ backgroundcolor="#0d0f1c", gridcolor="#1c2040",
399
+ color="#8892bb"),
400
+ zaxis=dict(title=f"PC3 ({pca.explained_variance_ratio_[2]*100:.1f}%)",
401
+ backgroundcolor="#0d0f1c", gridcolor="#1c2040",
402
+ color="#8892bb"),
403
+ bgcolor="#0d0f1c",
404
  ),
405
+ paper_bgcolor="#0d0f1c",
406
+ plot_bgcolor="#0d0f1c",
407
+ font=dict(color="#dde4f8"),
408
+ legend=dict(bgcolor="#13162a", bordercolor="#1c2040",
409
+ font=dict(color="#dde4f8")),
410
+ margin=dict(l=0, r=0, t=40, b=0),
411
+ height=520,
412
  )
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  return report, fig
415
 
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  # ── CSS ───────────────────────────────────────────────────────────────────────
418
  CSS = """
419
  body, .gradio-container { background: #0d0f1c !important; }
 
431
  border: none !important;
432
  font-weight: 800 !important;
433
  font-size: 1.05rem !important;
 
434
  border-radius: 10px !important; }
435
  .run-btn:hover { opacity: 0.86 !important; }
436
  .output-text textarea { font-family: 'Courier New', monospace !important;
437
  font-size: 0.79rem !important;
438
  color: #7dd8f8 !important;
439
+ line-height: 1.6 !important; }
440
  h1, h2, h3, h4 { color: #dde4f8 !important; }
 
 
 
 
441
  """
442
 
443
+ INTRO = """
444
+ **Discourse Compass** positions any text on a spectrum between two semantic poles you define.
445
+
446
+ Enter example sentences for each pole, then enter the texts you want to compare.
447
+ Each sentence goes on its own line.
448
+ """
449
 
450
  # ── UI ────────────────────────────────────────────────────────────────────────
451
  with gr.Blocks(css=CSS, title="Discourse Compass") as demo:
452
 
 
453
  gr.HTML("""
454
+ <div style="padding:8px 0 16px 0;">
455
+ <h1 style="color:#dde4f8;font-size:2rem;font-weight:900;
456
+ margin-bottom:4px;letter-spacing:-0.5px;">
457
  🧭 Discourse Compass
458
  </h1>
459
+ <p style="color:#5a6488;font-size:0.9rem;margin:0;">
460
+ Position any text on a spectrum between two semantic poles β€”
461
+ plain-language results for corpus analysts.
462
  </p>
463
  </div>""")
464
 
465
+ gr.Markdown(INTRO)
 
 
 
 
 
 
 
 
 
 
466
 
467
  with gr.Row():
468
+ # ── Pole A ────────────────────────────────────────────────────────
469
  with gr.Column():
470
+ gr.HTML("<h3 style='color:#e05555;margin-bottom:4px;'>β—€ Pole A</h3>")
471
+ pole_neg_name = gr.Textbox(
472
+ value="growth critical",
473
+ label="Name for Pole A",
474
+ placeholder="e.g. ecocentric, conservative, pro-regulation …",
475
+ )
476
+ pole_neg_text = gr.Textbox(
477
+ label="Example sentences for Pole A (one per line, min. 3)",
478
+ lines=8,
479
+ placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
480
+ )
481
+
482
+ # ── Pole B ────────────────────────────────────────────────────────
483
  with gr.Column():
484
+ gr.HTML("<h3 style='color:#4a9eff;margin-bottom:4px;'>β–Ά Pole B</h3>")
485
+ pole_pos_name = gr.Textbox(
486
+ value="growth favoured",
487
+ label="Name for Pole B",
488
+ placeholder="e.g. anthropocentric, progressive, pro-market …",
489
+ )
490
+ pole_pos_text = gr.Textbox(
491
+ label="Example sentences for Pole B (one per line, min. 3)",
492
+ lines=8,
493
+ placeholder="Paste 10–15 representative sentences here.\nOne sentence per line.",
494
+ )
495
+
496
+ gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
 
 
497
 
498
  with gr.Row():
499
+ # ── Text 1 ────────────────────────────────────────────────────────
500
  with gr.Column():
501
+ gr.HTML("<h3 style='color:#f5a623;margin-bottom:4px;'>β—† Text A</h3>")
502
+ text1_name = gr.Textbox(
503
+ value="Text A",
504
+ label="Name for Text A",
505
+ placeholder="e.g. Financial News, Corpus 1, Policy Document …",
506
+ )
507
+ text1_text = gr.Textbox(
508
+ label="Sentences from Text A (one per line)",
509
+ lines=6,
510
+ placeholder="Paste sentences here.\nOne sentence per line.",
511
+ )
512
+
513
+ # ── Text 2 ────────────────────────────────────────────────────────
514
  with gr.Column():
515
+ gr.HTML("<h3 style='color:#7ed321;margin-bottom:4px;'>β—† Text B</h3>")
516
+ text2_name = gr.Textbox(
517
+ value="Text B",
518
+ label="Name for Text B",
519
+ placeholder="e.g. Climate Reporting, Corpus 2, Interview Data …",
520
+ )
521
+ text2_text = gr.Textbox(
522
+ label="Sentences from Text B (one per line)",
523
+ lines=6,
524
+ placeholder="Paste sentences here.\nOne sentence per line.",
525
+ )
526
+
527
+ run_btn = gr.Button("β–Ά Run Analysis", elem_classes=["run-btn"])
528
+
529
+ gr.HTML("<hr style='border-color:#1c2040;margin:8px 0;'>")
530
+
531
+ plot_out = gr.Plot(label="3D Sentence Map (rotate & zoom)")
532
+ report_out = gr.Textbox(
533
+ label="Results",
534
+ lines=40,
535
+ interactive=False,
536
+ elem_classes=["output-text"],
537
+ )
 
 
 
 
 
 
 
 
 
 
 
 
538
 
 
539
  run_btn.click(
540
  fn=run_analysis,
541
+ inputs=[
542
+ pole_neg_name, pole_neg_text,
543
+ pole_pos_name, pole_pos_text,
544
+ text1_name, text1_text,
545
+ text2_name, text2_text,
546
+ ],
547
+ outputs=[report_out, plot_out],
548
  )
549
 
550
+ gr.HTML("""
551
+ <p style="color:#2a2e4a;font-size:0.73rem;text-align:center;
552
+ margin-top:20px;padding-bottom:10px;">
553
+ Embeddings: all-mpnet-base-v2 (768-dim) Β· H4rmony Project
 
 
554
  </p>""")
555
 
556
  if __name__ == "__main__":