neovalle commited on
Commit
9d334e5
Β·
verified Β·
1 Parent(s): 2ce0748

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -78
app.py CHANGED
@@ -347,108 +347,135 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
347
 
348
  # ── Build plain-language report ───────────────────────────────────────
349
 
350
- # Pole separation quality
351
  pole_cos = float(cosine(heart_a, heart_b))
352
  if pole_cos > 0.4:
353
- sep_word = "strong"
354
- sep_note = "The two poles are clearly distinct β€” results are reliable."
 
355
  elif pole_cos > 0.2:
356
- sep_word = "moderate"
357
- sep_note = "The poles are reasonably distinct β€” results are meaningful."
 
 
358
  else:
359
- sep_word = "weak"
360
- sep_note = "The poles are quite similar β€” consider using more contrasting sentences."
 
 
361
 
362
- # Position bar (pole A = left anchor, pole B = right anchor)
363
  def position_bar(pct, width=40):
364
  pos = max(0, min(1, pct))
365
  idx = int(round(pos * width))
366
  bar = "β–‘" * idx + "●" + "β–‘" * (width - idx)
367
  return bar
368
 
369
- # Plain position description
370
  def position_desc(pct, na, nb):
371
  if pct <= 0.10:
372
- return f"very close to the {na} pole"
373
  elif pct <= 0.30:
374
- return f"closer to {na}"
375
  elif pct <= 0.45:
376
- return f"slightly leaning toward {na}"
377
  elif pct <= 0.55:
378
- return f"roughly midway between {na} and {nb}"
379
  elif pct <= 0.70:
380
- return f"slightly leaning toward {nb}"
381
  elif pct <= 0.90:
382
- return f"closer to {nb}"
383
  else:
384
- return f"very close to the {nb} pole"
385
 
386
  desc_d1 = position_desc(pct_d1, name_a, name_b)
387
  desc_d2 = position_desc(pct_d2, name_a, name_b)
388
 
389
- # Gap between texts
390
  gap = abs(pct_d1 - pct_d2)
391
  if gap < 0.05:
392
- gap_desc = "no meaningful difference in position"
 
 
393
  elif gap < 0.15:
394
- gap_desc = "a small difference in position"
 
 
395
  elif gap < 0.30:
396
- gap_desc = "a moderate difference in position"
 
 
397
  elif gap < 0.50:
398
- gap_desc = "a substantial difference in position"
 
 
399
  else:
400
- gap_desc = "a very large difference in position"
 
 
401
 
402
- # Cluster tightness as reliability
403
- def reliability_label(spread, all_spreads):
404
  mn, mx = min(all_spreads), max(all_spreads)
405
  r = (spread - mn) / (mx - mn) if mx > mn else 0.5
406
  if r < 0.25:
407
- return "very consistent β€” position score is highly reliable"
 
408
  elif r < 0.50:
409
- return "fairly consistent β€” position score is reliable"
 
410
  elif r < 0.75:
411
- return "somewhat varied β€” position score is an average across different angles"
 
412
  else:
413
- return "wide-ranging β€” position score averages over quite different sentences"
 
414
 
415
- rel_d1 = reliability_label(bread_d1, all_breads)
416
- rel_d2 = reliability_label(bread_d2, all_breads)
 
 
417
 
418
- # Axis relevance (brief caveat only)
419
- def axis_relevance_note(angle):
420
  if angle < 30:
421
- return "sentences differ mainly along the pole spectrum"
 
422
  elif angle < 60:
423
- return "sentences differ partly along the spectrum, partly on other dimensions"
 
 
424
  else:
425
- return "sentences differ mainly on dimensions unrelated to this spectrum"
 
 
 
426
 
427
- note_d1 = axis_relevance_note(ang_d1)
428
- note_d2 = axis_relevance_note(ang_d2)
429
 
430
- # Overall verdict
431
  closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
432
  closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
433
  if gap < 0.05:
434
- verdict = (f"No clear difference: {name_d1} and {name_d2} occupy very "
435
- f"similar positions on the {name_a}↔{name_b} spectrum.")
 
436
  else:
437
- verdict = (f"{closer_to_a} aligns more closely with {name_a}; "
438
- f"{closer_to_b} aligns more closely with {name_b}. "
439
- f"There is {gap_desc} between them ({gap:.0%} of the full spectrum).")
 
440
 
441
- # Caveats
442
  caveats = []
443
- if sep_word == "weak":
444
- caveats.append(f"Pole separation is weak β€” the two poles are not very distinct in meaning space. "
445
- f"Try adding more contrasting sentences to each pole.")
446
- if bread_d1 > bread_b and bread_d1 > bread_a:
447
- caveats.append(f"{name_d1} is more wide-ranging than either pole corpus β€” "
448
- f"its position score averages over quite varied content.")
449
- if bread_d2 > bread_b and bread_d2 > bread_a:
450
- caveats.append(f"{name_d2} is more wide-ranging than either pole corpus β€” "
451
- f"its position score averages over quite varied content.")
452
 
453
  W = 62
454
  report_lines = [
@@ -457,13 +484,15 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
457
  f"{'═' * W}",
458
  f"",
459
  f" AXIS: {name_a} ←{'─' * 16}β†’ {name_b}",
460
- f" Pole separation: {sep_word} β€” {sep_note}",
461
- f" ({na} sentences in {name_a} pole Β· {nb} in {name_b} pole)",
 
462
  f"",
463
  f"{'─' * W}",
464
- f" WHERE EACH TEXT SITS ON THE SPECTRUM",
465
  f"{'─' * W}",
466
- f" 0% = {name_a} pole 100% = {name_b} pole",
 
467
  f"",
468
  f" {name_a} pole",
469
  f" {'β–‘' * 20}●{'β–‘' * 20} (0%)",
@@ -479,39 +508,45 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
479
  f" {name_b} pole",
480
  f" {'β–‘' * 20}●{'β–‘' * 20} (100%)",
481
  f"",
482
- f" Gap between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
483
- f" β†’ {gap_desc.capitalize()}.",
484
  f"",
485
  f"{'─' * W}",
486
- f" HOW RELIABLY DO THE SENTENCES CLUSTER?",
487
  f"{'─' * W}",
488
- f" A tight cluster means all sentences point in the same",
489
- f" direction β€” the position score is a reliable summary.",
490
- f" A loose cluster means sentences pull in different",
491
- f" directions β€” the score is an average and less decisive.",
 
492
  f"",
493
- f" {name_d1}: {rel_d1}.",
494
- f" {name_d2}: {rel_d2}.",
495
  f"",
496
- f" For reference β€” how wide-ranging are the pole corpora?",
497
- f" {name_a} pole: {breadth_label(bread_a, all_breads)}",
498
- f" {name_b} pole: {breadth_label(bread_b, all_breads)}",
499
  f"",
500
  f"{'─' * W}",
501
- f" AXIS ALIGNMENT NOTE",
502
  f"{'─' * W}",
503
- f" Do sentences within each text vary along the pole",
504
- f" spectrum, or mainly on unrelated dimensions?",
 
 
505
  f"",
506
- f" {name_d1}: {note_d1}.",
507
- f" {name_d2}: {note_d2}.",
 
 
 
508
  f"",
509
  ]
510
 
511
  if caveats:
512
  report_lines += [
513
  f"{'─' * W}",
514
- f" ⚠ CAVEATS",
515
  f"{'─' * W}",
516
  ]
517
  for c in caveats:
@@ -525,9 +560,9 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
525
  f" {verdict}",
526
  f"",
527
  f"{'═' * W}",
528
- f" All measurements use the full {MODEL_DIM}-dimensional meaning",
529
- f" space of {MODEL_NAME}. The 3D map is a simplified view",
530
- f" for visual orientation β€” rotate and zoom it above.",
531
  f"{'═' * W}",
532
  ]
533
  report = "\n".join(report_lines)
 
347
 
348
  # ── Build plain-language report ───────────────────────────────────────
349
 
350
+ # 1. Axis discriminability
351
  pole_cos = float(cosine(heart_a, heart_b))
352
  if pole_cos > 0.4:
353
+ sep_word = "well-defined"
354
+ sep_note = (f"The two poles occupy clearly distinct regions of meaning "
355
+ f"space β€” the axis is a reliable discriminator.")
356
  elif pole_cos > 0.2:
357
+ sep_word = "adequately defined"
358
+ sep_note = (f"The two poles are sufficiently distinct for meaningful "
359
+ f"comparison. Adding more exemplar sentences to each pole "
360
+ f"would sharpen the axis further.")
361
  else:
362
+ sep_word = "weakly defined"
363
+ sep_note = (f"The two poles overlap considerably in meaning space. "
364
+ f"Consider replacing some exemplar sentences with more "
365
+ f"clearly contrasting examples.")
366
 
367
+ # 2. Position bar
368
  def position_bar(pct, width=40):
369
  pos = max(0, min(1, pct))
370
  idx = int(round(pos * width))
371
  bar = "β–‘" * idx + "●" + "β–‘" * (width - idx)
372
  return bar
373
 
374
+ # 3. Position description
375
  def position_desc(pct, na, nb):
376
  if pct <= 0.10:
377
+ return f"strongly oriented toward {na}"
378
  elif pct <= 0.30:
379
+ return f"predominantly oriented toward {na}"
380
  elif pct <= 0.45:
381
+ return f"leaning toward {na}, with some features of {nb}"
382
  elif pct <= 0.55:
383
+ return f"positioned midway β€” drawing on both {na} and {nb} framings"
384
  elif pct <= 0.70:
385
+ return f"leaning toward {nb}, with some features of {na}"
386
  elif pct <= 0.90:
387
+ return f"predominantly oriented toward {nb}"
388
  else:
389
+ return f"strongly oriented toward {nb}"
390
 
391
  desc_d1 = position_desc(pct_d1, name_a, name_b)
392
  desc_d2 = position_desc(pct_d2, name_a, name_b)
393
 
394
+ # 4. Separation between the two texts
395
  gap = abs(pct_d1 - pct_d2)
396
  if gap < 0.05:
397
+ gap_desc = "no discernible difference in discourse orientation"
398
+ gap_interp = ("The two texts occupy virtually the same position on this "
399
+ "axis β€” they share the same overall framing.")
400
  elif gap < 0.15:
401
+ gap_desc = "a small but detectable difference in discourse orientation"
402
+ gap_interp = ("The two texts lean in different directions but remain "
403
+ "close β€” the framing contrast is subtle.")
404
  elif gap < 0.30:
405
+ gap_desc = "a clear difference in discourse orientation"
406
+ gap_interp = ("The two texts show a meaningful difference in how they "
407
+ "frame their subject matter relative to this axis.")
408
  elif gap < 0.50:
409
+ gap_desc = "a substantial difference in discourse orientation"
410
+ gap_interp = ("The two texts are clearly positioned on different sides "
411
+ "of this axis β€” their framings are genuinely divergent.")
412
  else:
413
+ gap_desc = "a very large difference in discourse orientation"
414
+ gap_interp = ("The two texts sit at opposite ends of the spectrum β€” "
415
+ "their underlying value orientations are strongly contrasting.")
416
 
417
+ # 5. Internal discourse coherence (thematic spread)
418
+ def coherence_label(spread, all_spreads):
419
  mn, mx = min(all_spreads), max(all_spreads)
420
  r = (spread - mn) / (mx - mn) if mx > mn else 0.5
421
  if r < 0.25:
422
+ return ("highly coherent β€” sentences cluster tightly, suggesting "
423
+ "a consistent and focused discourse style")
424
  elif r < 0.50:
425
+ return ("moderately coherent β€” sentences share a common orientation "
426
+ "while covering a range of topics")
427
  elif r < 0.75:
428
+ return ("thematically varied β€” sentences range across several "
429
+ "sub-topics, which is typical of a multi-section text")
430
  else:
431
+ return ("thematically broad β€” sentences span a wide range of "
432
+ "sub-topics, each contributing its own framing to the average")
433
 
434
+ coh_d1 = coherence_label(bread_d1, all_breads)
435
+ coh_d2 = coherence_label(bread_d2, all_breads)
436
+ coh_a = coherence_label(bread_a, all_breads)
437
+ coh_b = coherence_label(bread_b, all_breads)
438
 
439
+ # 6. Discursive scope (does the text vary along THIS axis, or others?)
440
+ def scope_label(angle):
441
  if angle < 30:
442
+ return ("variation within this text is primarily along this axis β€” "
443
+ "the axis captures the main dimension of internal contrast")
444
  elif angle < 60:
445
+ return ("variation within this text runs partly along this axis and "
446
+ "partly along other semantic dimensions β€” the axis is one of "
447
+ "several active in this discourse")
448
  else:
449
+ return ("variation within this text runs mostly along dimensions "
450
+ "other than this axis β€” sentences differ from each other "
451
+ "primarily on topics or registers not captured here, while "
452
+ "sharing a broadly consistent orientation on this spectrum")
453
 
454
+ scope_d1 = scope_label(ang_d1)
455
+ scope_d2 = scope_label(ang_d2)
456
 
457
+ # 7. Overall verdict
458
  closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
459
  closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
460
  if gap < 0.05:
461
+ verdict = (f"No clear discursive difference: {name_d1} and {name_d2} "
462
+ f"occupy essentially the same position on the "
463
+ f"{name_a}↔{name_b} spectrum.")
464
  else:
465
+ verdict = (f"{closer_to_a} is more strongly oriented toward {name_a} "
466
+ f"discourse; {closer_to_b} toward {name_b} discourse. "
467
+ f"The separation between them ({gap:.0%} of the full spectrum) "
468
+ f"represents {gap_desc}.")
469
 
470
+ # 8. Only flag genuinely problematic cases
471
  caveats = []
472
+ if sep_word == "weakly defined":
473
+ caveats.append(
474
+ f"The axis is weakly defined: the {name_a} and {name_b} pole "
475
+ f"corpora are not sufficiently distinct in meaning space. "
476
+ f"Results should be treated with caution β€” consider revising "
477
+ f"or extending the exemplar sentences for each pole."
478
+ )
 
 
479
 
480
  W = 62
481
  report_lines = [
 
484
  f"{'═' * W}",
485
  f"",
486
  f" AXIS: {name_a} ←{'─' * 16}β†’ {name_b}",
487
+ f" Axis quality: {sep_word}",
488
+ f" {sep_note}",
489
+ f" ({na} exemplar sentences at {name_a} pole Β· {nb} at {name_b} pole)",
490
  f"",
491
  f"{'─' * W}",
492
+ f" DISCOURSE ORIENTATION",
493
  f"{'─' * W}",
494
+ f" How far along the spectrum does each text sit?",
495
+ f" Left = {name_a} Right = {name_b}",
496
  f"",
497
  f" {name_a} pole",
498
  f" {'β–‘' * 20}●{'β–‘' * 20} (0%)",
 
508
  f" {name_b} pole",
509
  f" {'β–‘' * 20}●{'β–‘' * 20} (100%)",
510
  f"",
511
+ f" Distance between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
512
+ f" β†’ {gap_interp}",
513
  f"",
514
  f"{'─' * W}",
515
+ f" INTERNAL DISCOURSE COHERENCE",
516
  f"{'─' * W}",
517
+ f" How consistent is the framing within each text?",
518
+ f" A tightly coherent text speaks with one voice on this axis.",
519
+ f" A thematically broad text covers many sub-topics, each",
520
+ f" contributing its own framing β€” both patterns are linguistically",
521
+ f" meaningful, not errors.",
522
  f"",
523
+ f" {name_d1}: {coh_d1}.",
524
+ f" {name_d2}: {coh_d2}.",
525
  f"",
526
+ f" For reference β€” coherence of the pole corpora:",
527
+ f" {name_a} pole: {coh_a}.",
528
+ f" {name_b} pole: {coh_b}.",
529
  f"",
530
  f"{'─' * W}",
531
+ f" DISCURSIVE SCOPE",
532
  f"{'─' * W}",
533
+ f" Along which dimensions do sentences within each text vary?",
534
+ f" This reveals whether this axis captures the main source of",
535
+ f" internal contrast, or whether the text is doing more things",
536
+ f" at once than a single axis can describe.",
537
  f"",
538
+ f" {name_d1}:",
539
+ f" {scope_d1}.",
540
+ f"",
541
+ f" {name_d2}:",
542
+ f" {scope_d2}.",
543
  f"",
544
  ]
545
 
546
  if caveats:
547
  report_lines += [
548
  f"{'─' * W}",
549
+ f" ⚠ NOTE",
550
  f"{'─' * W}",
551
  ]
552
  for c in caveats:
 
560
  f" {verdict}",
561
  f"",
562
  f"{'═' * W}",
563
+ f" Scores are computed in the full {MODEL_DIM}-dimensional semantic",
564
+ f" space of {MODEL_NAME}. The 3D map above is a",
565
+ f" dimensionality-reduced view for visual orientation only.",
566
  f"{'═' * W}",
567
  ]
568
  report = "\n".join(report_lines)