Spaces:

neovalle
/

SpatialDiscourseAnalysis

Sleeping

App Files Files Community

neovalle commited on Apr 20

Commit

9d334e5

verified ·

1 Parent(s): 2ce0748

Upload app.py

Browse files

Files changed (1) hide show

app.py +113 -78

app.py CHANGED Viewed

@@ -347,108 +347,135 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
     # ── Build plain-language report ───────────────────────────────────────
-    # Pole separation quality
     pole_cos = float(cosine(heart_a, heart_b))
     if pole_cos > 0.4:
-        sep_word = "strong"
-        sep_note = "The two poles are clearly distinct — results are reliable."
     elif pole_cos > 0.2:
-        sep_word = "moderate"
-        sep_note = "The poles are reasonably distinct — results are meaningful."
     else:
-        sep_word = "weak"
-        sep_note = "The poles are quite similar — consider using more contrasting sentences."
-    # Position bar (pole A = left anchor, pole B = right anchor)
     def position_bar(pct, width=40):
         pos = max(0, min(1, pct))
         idx = int(round(pos * width))
         bar = "░" * idx + "●" + "░" * (width - idx)
         return bar
-    # Plain position description
     def position_desc(pct, na, nb):
         if pct <= 0.10:
-            return f"very close to the {na} pole"
         elif pct <= 0.30:
-            return f"closer to {na}"
         elif pct <= 0.45:
-            return f"slightly leaning toward {na}"
         elif pct <= 0.55:
-            return f"roughly midway between {na} and {nb}"
         elif pct <= 0.70:
-            return f"slightly leaning toward {nb}"
         elif pct <= 0.90:
-            return f"closer to {nb}"
         else:
-            return f"very close to the {nb} pole"
     desc_d1 = position_desc(pct_d1, name_a, name_b)
     desc_d2 = position_desc(pct_d2, name_a, name_b)
-    # Gap between texts
     gap = abs(pct_d1 - pct_d2)
     if gap < 0.05:
-        gap_desc = "no meaningful difference in position"
     elif gap < 0.15:
-        gap_desc = "a small difference in position"
     elif gap < 0.30:
-        gap_desc = "a moderate difference in position"
     elif gap < 0.50:
-        gap_desc = "a substantial difference in position"
     else:
-        gap_desc = "a very large difference in position"
-    # Cluster tightness as reliability
-    def reliability_label(spread, all_spreads):
         mn, mx = min(all_spreads), max(all_spreads)
         r = (spread - mn) / (mx - mn) if mx > mn else 0.5
         if r < 0.25:
-            return "very consistent — position score is highly reliable"
         elif r < 0.50:
-            return "fairly consistent — position score is reliable"
         elif r < 0.75:
-            return "somewhat varied — position score is an average across different angles"
         else:
-            return "wide-ranging — position score averages over quite different sentences"
-    rel_d1 = reliability_label(bread_d1, all_breads)
-    rel_d2 = reliability_label(bread_d2, all_breads)
-    # Axis relevance (brief caveat only)
-    def axis_relevance_note(angle):
         if angle < 30:
-            return "sentences differ mainly along the pole spectrum"
         elif angle < 60:
-            return "sentences differ partly along the spectrum, partly on other dimensions"
         else:
-            return "sentences differ mainly on dimensions unrelated to this spectrum"
-    note_d1 = axis_relevance_note(ang_d1)
-    note_d2 = axis_relevance_note(ang_d2)
-    # Overall verdict
     closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
     closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
     if gap < 0.05:
-        verdict = (f"No clear difference: {name_d1} and {name_d2} occupy very "
-                   f"similar positions on the {name_a}↔{name_b} spectrum.")
     else:
-        verdict = (f"{closer_to_a} aligns more closely with {name_a}; "
-                   f"{closer_to_b} aligns more closely with {name_b}. "
-                   f"There is {gap_desc} between them ({gap:.0%} of the full spectrum).")
-    # Caveats
     caveats = []
-    if sep_word == "weak":
-        caveats.append(f"Pole separation is weak — the two poles are not very distinct in meaning space. "
-                       f"Try adding more contrasting sentences to each pole.")
-    if bread_d1 > bread_b and bread_d1 > bread_a:
-        caveats.append(f"{name_d1} is more wide-ranging than either pole corpus — "
-                       f"its position score averages over quite varied content.")
-    if bread_d2 > bread_b and bread_d2 > bread_a:
-        caveats.append(f"{name_d2} is more wide-ranging than either pole corpus — "
-                       f"its position score averages over quite varied content.")
     W = 62
     report_lines = [
@@ -457,13 +484,15 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
         f"{'═' * W}",
         f"",
         f"  AXIS:  {name_a}  ←{'─' * 16}→  {name_b}",
-        f"  Pole separation: {sep_word} — {sep_note}",
-        f"  ({na} sentences in {name_a} pole · {nb} in {name_b} pole)",
         f"",
         f"{'─' * W}",
-        f"  WHERE EACH TEXT SITS ON THE SPECTRUM",
         f"{'─' * W}",
-        f"  0% = {name_a} pole          100% = {name_b} pole",
         f"",
         f"  {name_a} pole",
         f"  {'░' * 20}●{'░' * 20}  (0%)",
@@ -479,39 +508,45 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
         f"  {name_b} pole",
         f"  {'░' * 20}●{'░' * 20}  (100%)",
         f"",
-        f"  Gap between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
-        f"  → {gap_desc.capitalize()}.",
         f"",
         f"{'─' * W}",
-        f"  HOW RELIABLY DO THE SENTENCES CLUSTER?",
         f"{'─' * W}",
-        f"  A tight cluster means all sentences point in the same",
-        f"  direction — the position score is a reliable summary.",
-        f"  A loose cluster means sentences pull in different",
-        f"  directions — the score is an average and less decisive.",
         f"",
-        f"  {name_d1}:  {rel_d1}.",
-        f"  {name_d2}:  {rel_d2}.",
         f"",
-        f"  For reference — how wide-ranging are the pole corpora?",
-        f"  {name_a} pole:  {breadth_label(bread_a, all_breads)}",
-        f"  {name_b} pole:  {breadth_label(bread_b, all_breads)}",
         f"",
         f"{'─' * W}",
-        f"  AXIS ALIGNMENT NOTE",
         f"{'─' * W}",
-        f"  Do sentences within each text vary along the pole",
-        f"  spectrum, or mainly on unrelated dimensions?",
         f"",
-        f"  {name_d1}:  {note_d1}.",
-        f"  {name_d2}:  {note_d2}.",
         f"",
     ]
     if caveats:
         report_lines += [
             f"{'─' * W}",
-            f"  ⚠  CAVEATS",
             f"{'─' * W}",
         ]
         for c in caveats:
@@ -525,9 +560,9 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
         f"  {verdict}",
         f"",
         f"{'═' * W}",
-        f"  All measurements use the full {MODEL_DIM}-dimensional meaning",
-        f"  space of {MODEL_NAME}. The 3D map is a simplified view",
-        f"  for visual orientation — rotate and zoom it above.",
         f"{'═' * W}",
     ]
     report = "\n".join(report_lines)

     # ── Build plain-language report ───────────────────────────────────────
+    # 1. Axis discriminability
     pole_cos = float(cosine(heart_a, heart_b))
     if pole_cos > 0.4:
+        sep_word = "well-defined"
+        sep_note = (f"The two poles occupy clearly distinct regions of meaning "
+                    f"space — the axis is a reliable discriminator.")
     elif pole_cos > 0.2:
+        sep_word = "adequately defined"
+        sep_note = (f"The two poles are sufficiently distinct for meaningful "
+                    f"comparison. Adding more exemplar sentences to each pole "
+                    f"would sharpen the axis further.")
     else:
+        sep_word = "weakly defined"
+        sep_note = (f"The two poles overlap considerably in meaning space. "
+                    f"Consider replacing some exemplar sentences with more "
+                    f"clearly contrasting examples.")
+    # 2. Position bar
     def position_bar(pct, width=40):
         pos = max(0, min(1, pct))
         idx = int(round(pos * width))
         bar = "░" * idx + "●" + "░" * (width - idx)
         return bar
+    # 3. Position description
     def position_desc(pct, na, nb):
         if pct <= 0.10:
+            return f"strongly oriented toward {na}"
         elif pct <= 0.30:
+            return f"predominantly oriented toward {na}"
         elif pct <= 0.45:
+            return f"leaning toward {na}, with some features of {nb}"
         elif pct <= 0.55:
+            return f"positioned midway — drawing on both {na} and {nb} framings"
         elif pct <= 0.70:
+            return f"leaning toward {nb}, with some features of {na}"
         elif pct <= 0.90:
+            return f"predominantly oriented toward {nb}"
         else:
+            return f"strongly oriented toward {nb}"
     desc_d1 = position_desc(pct_d1, name_a, name_b)
     desc_d2 = position_desc(pct_d2, name_a, name_b)
+    # 4. Separation between the two texts
     gap = abs(pct_d1 - pct_d2)
     if gap < 0.05:
+        gap_desc = "no discernible difference in discourse orientation"
+        gap_interp = ("The two texts occupy virtually the same position on this "
+                      "axis — they share the same overall framing.")
     elif gap < 0.15:
+        gap_desc = "a small but detectable difference in discourse orientation"
+        gap_interp = ("The two texts lean in different directions but remain "
+                      "close — the framing contrast is subtle.")
     elif gap < 0.30:
+        gap_desc = "a clear difference in discourse orientation"
+        gap_interp = ("The two texts show a meaningful difference in how they "
+                      "frame their subject matter relative to this axis.")
     elif gap < 0.50:
+        gap_desc = "a substantial difference in discourse orientation"
+        gap_interp = ("The two texts are clearly positioned on different sides "
+                      "of this axis — their framings are genuinely divergent.")
     else:
+        gap_desc = "a very large difference in discourse orientation"
+        gap_interp = ("The two texts sit at opposite ends of the spectrum — "
+                      "their underlying value orientations are strongly contrasting.")
+    # 5. Internal discourse coherence (thematic spread)
+    def coherence_label(spread, all_spreads):
         mn, mx = min(all_spreads), max(all_spreads)
         r = (spread - mn) / (mx - mn) if mx > mn else 0.5
         if r < 0.25:
+            return ("highly coherent — sentences cluster tightly, suggesting "
+                    "a consistent and focused discourse style")
         elif r < 0.50:
+            return ("moderately coherent — sentences share a common orientation "
+                    "while covering a range of topics")
         elif r < 0.75:
+            return ("thematically varied — sentences range across several "
+                    "sub-topics, which is typical of a multi-section text")
         else:
+            return ("thematically broad — sentences span a wide range of "
+                    "sub-topics, each contributing its own framing to the average")
+    coh_d1 = coherence_label(bread_d1, all_breads)
+    coh_d2 = coherence_label(bread_d2, all_breads)
+    coh_a  = coherence_label(bread_a,  all_breads)
+    coh_b  = coherence_label(bread_b,  all_breads)
+    # 6. Discursive scope (does the text vary along THIS axis, or others?)
+    def scope_label(angle):
         if angle < 30:
+            return ("variation within this text is primarily along this axis — "
+                    "the axis captures the main dimension of internal contrast")
         elif angle < 60:
+            return ("variation within this text runs partly along this axis and "
+                    "partly along other semantic dimensions — the axis is one of "
+                    "several active in this discourse")
         else:
+            return ("variation within this text runs mostly along dimensions "
+                    "other than this axis — sentences differ from each other "
+                    "primarily on topics or registers not captured here, while "
+                    "sharing a broadly consistent orientation on this spectrum")
+    scope_d1 = scope_label(ang_d1)
+    scope_d2 = scope_label(ang_d2)
+    # 7. Overall verdict
     closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
     closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
     if gap < 0.05:
+        verdict = (f"No clear discursive difference: {name_d1} and {name_d2} "
+                   f"occupy essentially the same position on the "
+                   f"{name_a}↔{name_b} spectrum.")
     else:
+        verdict = (f"{closer_to_a} is more strongly oriented toward {name_a} "
+                   f"discourse; {closer_to_b} toward {name_b} discourse. "
+                   f"The separation between them ({gap:.0%} of the full spectrum) "
+                   f"represents {gap_desc}.")
+    # 8. Only flag genuinely problematic cases
     caveats = []
+    if sep_word == "weakly defined":
+        caveats.append(
+            f"The axis is weakly defined: the {name_a} and {name_b} pole "
+            f"corpora are not sufficiently distinct in meaning space. "
+            f"Results should be treated with caution — consider revising "
+            f"or extending the exemplar sentences for each pole."
+        )
     W = 62
     report_lines = [
         f"{'═' * W}",
         f"",
         f"  AXIS:  {name_a}  ←{'─' * 16}→  {name_b}",
+        f"  Axis quality: {sep_word}",
+        f"  {sep_note}",
+        f"  ({na} exemplar sentences at {name_a} pole · {nb} at {name_b} pole)",
         f"",
         f"{'─' * W}",
+        f"  DISCOURSE ORIENTATION",
         f"{'─' * W}",
+        f"  How far along the spectrum does each text sit?",
+        f"  Left = {name_a}   Right = {name_b}",
         f"",
         f"  {name_a} pole",
         f"  {'░' * 20}●{'░' * 20}  (0%)",
         f"  {name_b} pole",
         f"  {'░' * 20}●{'░' * 20}  (100%)",
         f"",
+        f"  Distance between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
+        f"  → {gap_interp}",
         f"",
         f"{'─' * W}",
+        f"  INTERNAL DISCOURSE COHERENCE",
         f"{'─' * W}",
+        f"  How consistent is the framing within each text?",
+        f"  A tightly coherent text speaks with one voice on this axis.",
+        f"  A thematically broad text covers many sub-topics, each",
+        f"  contributing its own framing — both patterns are linguistically",
+        f"  meaningful, not errors.",
         f"",
+        f"  {name_d1}:  {coh_d1}.",
+        f"  {name_d2}:  {coh_d2}.",
         f"",
+        f"  For reference — coherence of the pole corpora:",
+        f"  {name_a} pole:  {coh_a}.",
+        f"  {name_b} pole:  {coh_b}.",
         f"",
         f"{'─' * W}",
+        f"  DISCURSIVE SCOPE",
         f"{'─' * W}",
+        f"  Along which dimensions do sentences within each text vary?",
+        f"  This reveals whether this axis captures the main source of",
+        f"  internal contrast, or whether the text is doing more things",
+        f"  at once than a single axis can describe.",
         f"",
+        f"  {name_d1}:",
+        f"  {scope_d1}.",
+        f"",
+        f"  {name_d2}:",
+        f"  {scope_d2}.",
         f"",
     ]
     if caveats:
         report_lines += [
             f"{'─' * W}",
+            f"  ⚠  NOTE",
             f"{'─' * W}",
         ]
         for c in caveats:
         f"  {verdict}",
         f"",
         f"{'═' * W}",
+        f"  Scores are computed in the full {MODEL_DIM}-dimensional semantic",
+        f"  space of {MODEL_NAME}. The 3D map above is a",
+        f"  dimensionality-reduced view for visual orientation only.",
         f"{'═' * W}",
     ]
     report = "\n".join(report_lines)