Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -347,108 +347,135 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
|
|
| 347 |
|
| 348 |
# ββ Build plain-language report βββββββββββββββββββββββββββββββββββββββ
|
| 349 |
|
| 350 |
-
#
|
| 351 |
pole_cos = float(cosine(heart_a, heart_b))
|
| 352 |
if pole_cos > 0.4:
|
| 353 |
-
sep_word = "
|
| 354 |
-
sep_note = "The two poles
|
|
|
|
| 355 |
elif pole_cos > 0.2:
|
| 356 |
-
sep_word = "
|
| 357 |
-
sep_note = "The poles are
|
|
|
|
|
|
|
| 358 |
else:
|
| 359 |
-
sep_word = "
|
| 360 |
-
sep_note = "The poles
|
|
|
|
|
|
|
| 361 |
|
| 362 |
-
# Position bar
|
| 363 |
def position_bar(pct, width=40):
|
| 364 |
pos = max(0, min(1, pct))
|
| 365 |
idx = int(round(pos * width))
|
| 366 |
bar = "β" * idx + "β" + "β" * (width - idx)
|
| 367 |
return bar
|
| 368 |
|
| 369 |
-
#
|
| 370 |
def position_desc(pct, na, nb):
|
| 371 |
if pct <= 0.10:
|
| 372 |
-
return f"
|
| 373 |
elif pct <= 0.30:
|
| 374 |
-
return f"
|
| 375 |
elif pct <= 0.45:
|
| 376 |
-
return f"
|
| 377 |
elif pct <= 0.55:
|
| 378 |
-
return f"
|
| 379 |
elif pct <= 0.70:
|
| 380 |
-
return f"
|
| 381 |
elif pct <= 0.90:
|
| 382 |
-
return f"
|
| 383 |
else:
|
| 384 |
-
return f"
|
| 385 |
|
| 386 |
desc_d1 = position_desc(pct_d1, name_a, name_b)
|
| 387 |
desc_d2 = position_desc(pct_d2, name_a, name_b)
|
| 388 |
|
| 389 |
-
#
|
| 390 |
gap = abs(pct_d1 - pct_d2)
|
| 391 |
if gap < 0.05:
|
| 392 |
-
gap_desc = "no
|
|
|
|
|
|
|
| 393 |
elif gap < 0.15:
|
| 394 |
-
gap_desc = "a small difference in
|
|
|
|
|
|
|
| 395 |
elif gap < 0.30:
|
| 396 |
-
gap_desc = "a
|
|
|
|
|
|
|
| 397 |
elif gap < 0.50:
|
| 398 |
-
gap_desc = "a substantial difference in
|
|
|
|
|
|
|
| 399 |
else:
|
| 400 |
-
gap_desc = "a very large difference in
|
|
|
|
|
|
|
| 401 |
|
| 402 |
-
#
|
| 403 |
-
def
|
| 404 |
mn, mx = min(all_spreads), max(all_spreads)
|
| 405 |
r = (spread - mn) / (mx - mn) if mx > mn else 0.5
|
| 406 |
if r < 0.25:
|
| 407 |
-
return "
|
|
|
|
| 408 |
elif r < 0.50:
|
| 409 |
-
return "
|
|
|
|
| 410 |
elif r < 0.75:
|
| 411 |
-
return "
|
|
|
|
| 412 |
else:
|
| 413 |
-
return "
|
|
|
|
| 414 |
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
| 417 |
|
| 418 |
-
#
|
| 419 |
-
def
|
| 420 |
if angle < 30:
|
| 421 |
-
return "
|
|
|
|
| 422 |
elif angle < 60:
|
| 423 |
-
return "
|
|
|
|
|
|
|
| 424 |
else:
|
| 425 |
-
return "
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
|
| 430 |
-
# Overall verdict
|
| 431 |
closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
|
| 432 |
closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
|
| 433 |
if gap < 0.05:
|
| 434 |
-
verdict = (f"No clear difference: {name_d1} and {name_d2}
|
| 435 |
-
f"
|
|
|
|
| 436 |
else:
|
| 437 |
-
verdict = (f"{closer_to_a}
|
| 438 |
-
f"{closer_to_b}
|
| 439 |
-
f"
|
|
|
|
| 440 |
|
| 441 |
-
#
|
| 442 |
caveats = []
|
| 443 |
-
if sep_word == "
|
| 444 |
-
caveats.append(
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
caveats.append(f"{name_d2} is more wide-ranging than either pole corpus β "
|
| 451 |
-
f"its position score averages over quite varied content.")
|
| 452 |
|
| 453 |
W = 62
|
| 454 |
report_lines = [
|
|
@@ -457,13 +484,15 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
|
|
| 457 |
f"{'β' * W}",
|
| 458 |
f"",
|
| 459 |
f" AXIS: {name_a} β{'β' * 16}β {name_b}",
|
| 460 |
-
f"
|
| 461 |
-
f"
|
|
|
|
| 462 |
f"",
|
| 463 |
f"{'β' * W}",
|
| 464 |
-
f"
|
| 465 |
f"{'β' * W}",
|
| 466 |
-
f"
|
|
|
|
| 467 |
f"",
|
| 468 |
f" {name_a} pole",
|
| 469 |
f" {'β' * 20}β{'β' * 20} (0%)",
|
|
@@ -479,39 +508,45 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
|
|
| 479 |
f" {name_b} pole",
|
| 480 |
f" {'β' * 20}β{'β' * 20} (100%)",
|
| 481 |
f"",
|
| 482 |
-
f"
|
| 483 |
-
f" β {
|
| 484 |
f"",
|
| 485 |
f"{'β' * W}",
|
| 486 |
-
f"
|
| 487 |
f"{'β' * W}",
|
| 488 |
-
f"
|
| 489 |
-
f"
|
| 490 |
-
f" A
|
| 491 |
-
f"
|
|
|
|
| 492 |
f"",
|
| 493 |
-
f" {name_d1}: {
|
| 494 |
-
f" {name_d2}: {
|
| 495 |
f"",
|
| 496 |
-
f" For reference β
|
| 497 |
-
f" {name_a} pole: {
|
| 498 |
-
f" {name_b} pole: {
|
| 499 |
f"",
|
| 500 |
f"{'β' * W}",
|
| 501 |
-
f"
|
| 502 |
f"{'β' * W}",
|
| 503 |
-
f"
|
| 504 |
-
f"
|
|
|
|
|
|
|
| 505 |
f"",
|
| 506 |
-
f" {name_d1}:
|
| 507 |
-
f" {
|
|
|
|
|
|
|
|
|
|
| 508 |
f"",
|
| 509 |
]
|
| 510 |
|
| 511 |
if caveats:
|
| 512 |
report_lines += [
|
| 513 |
f"{'β' * W}",
|
| 514 |
-
f" β
|
| 515 |
f"{'β' * W}",
|
| 516 |
]
|
| 517 |
for c in caveats:
|
|
@@ -525,9 +560,9 @@ def run_analysis(text_a, text_b, text_d1, text_d2,
|
|
| 525 |
f" {verdict}",
|
| 526 |
f"",
|
| 527 |
f"{'β' * W}",
|
| 528 |
-
f"
|
| 529 |
-
f" space of {MODEL_NAME}. The 3D map is a
|
| 530 |
-
f" for visual orientation
|
| 531 |
f"{'β' * W}",
|
| 532 |
]
|
| 533 |
report = "\n".join(report_lines)
|
|
|
|
| 347 |
|
| 348 |
# ββ Build plain-language report βββββββββββββββββββββββββββββββββββββββ
|
| 349 |
|
| 350 |
+
# 1. Axis discriminability
|
| 351 |
pole_cos = float(cosine(heart_a, heart_b))
|
| 352 |
if pole_cos > 0.4:
|
| 353 |
+
sep_word = "well-defined"
|
| 354 |
+
sep_note = (f"The two poles occupy clearly distinct regions of meaning "
|
| 355 |
+
f"space β the axis is a reliable discriminator.")
|
| 356 |
elif pole_cos > 0.2:
|
| 357 |
+
sep_word = "adequately defined"
|
| 358 |
+
sep_note = (f"The two poles are sufficiently distinct for meaningful "
|
| 359 |
+
f"comparison. Adding more exemplar sentences to each pole "
|
| 360 |
+
f"would sharpen the axis further.")
|
| 361 |
else:
|
| 362 |
+
sep_word = "weakly defined"
|
| 363 |
+
sep_note = (f"The two poles overlap considerably in meaning space. "
|
| 364 |
+
f"Consider replacing some exemplar sentences with more "
|
| 365 |
+
f"clearly contrasting examples.")
|
| 366 |
|
| 367 |
+
# 2. Position bar
|
| 368 |
def position_bar(pct, width=40):
|
| 369 |
pos = max(0, min(1, pct))
|
| 370 |
idx = int(round(pos * width))
|
| 371 |
bar = "β" * idx + "β" + "β" * (width - idx)
|
| 372 |
return bar
|
| 373 |
|
| 374 |
+
# 3. Position description
|
| 375 |
def position_desc(pct, na, nb):
|
| 376 |
if pct <= 0.10:
|
| 377 |
+
return f"strongly oriented toward {na}"
|
| 378 |
elif pct <= 0.30:
|
| 379 |
+
return f"predominantly oriented toward {na}"
|
| 380 |
elif pct <= 0.45:
|
| 381 |
+
return f"leaning toward {na}, with some features of {nb}"
|
| 382 |
elif pct <= 0.55:
|
| 383 |
+
return f"positioned midway β drawing on both {na} and {nb} framings"
|
| 384 |
elif pct <= 0.70:
|
| 385 |
+
return f"leaning toward {nb}, with some features of {na}"
|
| 386 |
elif pct <= 0.90:
|
| 387 |
+
return f"predominantly oriented toward {nb}"
|
| 388 |
else:
|
| 389 |
+
return f"strongly oriented toward {nb}"
|
| 390 |
|
| 391 |
desc_d1 = position_desc(pct_d1, name_a, name_b)
|
| 392 |
desc_d2 = position_desc(pct_d2, name_a, name_b)
|
| 393 |
|
| 394 |
+
# 4. Separation between the two texts
|
| 395 |
gap = abs(pct_d1 - pct_d2)
|
| 396 |
if gap < 0.05:
|
| 397 |
+
gap_desc = "no discernible difference in discourse orientation"
|
| 398 |
+
gap_interp = ("The two texts occupy virtually the same position on this "
|
| 399 |
+
"axis β they share the same overall framing.")
|
| 400 |
elif gap < 0.15:
|
| 401 |
+
gap_desc = "a small but detectable difference in discourse orientation"
|
| 402 |
+
gap_interp = ("The two texts lean in different directions but remain "
|
| 403 |
+
"close β the framing contrast is subtle.")
|
| 404 |
elif gap < 0.30:
|
| 405 |
+
gap_desc = "a clear difference in discourse orientation"
|
| 406 |
+
gap_interp = ("The two texts show a meaningful difference in how they "
|
| 407 |
+
"frame their subject matter relative to this axis.")
|
| 408 |
elif gap < 0.50:
|
| 409 |
+
gap_desc = "a substantial difference in discourse orientation"
|
| 410 |
+
gap_interp = ("The two texts are clearly positioned on different sides "
|
| 411 |
+
"of this axis β their framings are genuinely divergent.")
|
| 412 |
else:
|
| 413 |
+
gap_desc = "a very large difference in discourse orientation"
|
| 414 |
+
gap_interp = ("The two texts sit at opposite ends of the spectrum β "
|
| 415 |
+
"their underlying value orientations are strongly contrasting.")
|
| 416 |
|
| 417 |
+
# 5. Internal discourse coherence (thematic spread)
|
| 418 |
+
def coherence_label(spread, all_spreads):
|
| 419 |
mn, mx = min(all_spreads), max(all_spreads)
|
| 420 |
r = (spread - mn) / (mx - mn) if mx > mn else 0.5
|
| 421 |
if r < 0.25:
|
| 422 |
+
return ("highly coherent β sentences cluster tightly, suggesting "
|
| 423 |
+
"a consistent and focused discourse style")
|
| 424 |
elif r < 0.50:
|
| 425 |
+
return ("moderately coherent β sentences share a common orientation "
|
| 426 |
+
"while covering a range of topics")
|
| 427 |
elif r < 0.75:
|
| 428 |
+
return ("thematically varied β sentences range across several "
|
| 429 |
+
"sub-topics, which is typical of a multi-section text")
|
| 430 |
else:
|
| 431 |
+
return ("thematically broad β sentences span a wide range of "
|
| 432 |
+
"sub-topics, each contributing its own framing to the average")
|
| 433 |
|
| 434 |
+
coh_d1 = coherence_label(bread_d1, all_breads)
|
| 435 |
+
coh_d2 = coherence_label(bread_d2, all_breads)
|
| 436 |
+
coh_a = coherence_label(bread_a, all_breads)
|
| 437 |
+
coh_b = coherence_label(bread_b, all_breads)
|
| 438 |
|
| 439 |
+
# 6. Discursive scope (does the text vary along THIS axis, or others?)
|
| 440 |
+
def scope_label(angle):
|
| 441 |
if angle < 30:
|
| 442 |
+
return ("variation within this text is primarily along this axis β "
|
| 443 |
+
"the axis captures the main dimension of internal contrast")
|
| 444 |
elif angle < 60:
|
| 445 |
+
return ("variation within this text runs partly along this axis and "
|
| 446 |
+
"partly along other semantic dimensions β the axis is one of "
|
| 447 |
+
"several active in this discourse")
|
| 448 |
else:
|
| 449 |
+
return ("variation within this text runs mostly along dimensions "
|
| 450 |
+
"other than this axis β sentences differ from each other "
|
| 451 |
+
"primarily on topics or registers not captured here, while "
|
| 452 |
+
"sharing a broadly consistent orientation on this spectrum")
|
| 453 |
|
| 454 |
+
scope_d1 = scope_label(ang_d1)
|
| 455 |
+
scope_d2 = scope_label(ang_d2)
|
| 456 |
|
| 457 |
+
# 7. Overall verdict
|
| 458 |
closer_to_a = name_d1 if pct_d1 < pct_d2 else name_d2
|
| 459 |
closer_to_b = name_d2 if pct_d1 < pct_d2 else name_d1
|
| 460 |
if gap < 0.05:
|
| 461 |
+
verdict = (f"No clear discursive difference: {name_d1} and {name_d2} "
|
| 462 |
+
f"occupy essentially the same position on the "
|
| 463 |
+
f"{name_a}β{name_b} spectrum.")
|
| 464 |
else:
|
| 465 |
+
verdict = (f"{closer_to_a} is more strongly oriented toward {name_a} "
|
| 466 |
+
f"discourse; {closer_to_b} toward {name_b} discourse. "
|
| 467 |
+
f"The separation between them ({gap:.0%} of the full spectrum) "
|
| 468 |
+
f"represents {gap_desc}.")
|
| 469 |
|
| 470 |
+
# 8. Only flag genuinely problematic cases
|
| 471 |
caveats = []
|
| 472 |
+
if sep_word == "weakly defined":
|
| 473 |
+
caveats.append(
|
| 474 |
+
f"The axis is weakly defined: the {name_a} and {name_b} pole "
|
| 475 |
+
f"corpora are not sufficiently distinct in meaning space. "
|
| 476 |
+
f"Results should be treated with caution β consider revising "
|
| 477 |
+
f"or extending the exemplar sentences for each pole."
|
| 478 |
+
)
|
|
|
|
|
|
|
| 479 |
|
| 480 |
W = 62
|
| 481 |
report_lines = [
|
|
|
|
| 484 |
f"{'β' * W}",
|
| 485 |
f"",
|
| 486 |
f" AXIS: {name_a} β{'β' * 16}β {name_b}",
|
| 487 |
+
f" Axis quality: {sep_word}",
|
| 488 |
+
f" {sep_note}",
|
| 489 |
+
f" ({na} exemplar sentences at {name_a} pole Β· {nb} at {name_b} pole)",
|
| 490 |
f"",
|
| 491 |
f"{'β' * W}",
|
| 492 |
+
f" DISCOURSE ORIENTATION",
|
| 493 |
f"{'β' * W}",
|
| 494 |
+
f" How far along the spectrum does each text sit?",
|
| 495 |
+
f" Left = {name_a} Right = {name_b}",
|
| 496 |
f"",
|
| 497 |
f" {name_a} pole",
|
| 498 |
f" {'β' * 20}β{'β' * 20} (0%)",
|
|
|
|
| 508 |
f" {name_b} pole",
|
| 509 |
f" {'β' * 20}β{'β' * 20} (100%)",
|
| 510 |
f"",
|
| 511 |
+
f" Distance between {name_d1} and {name_d2}: {gap:.0%} of the spectrum",
|
| 512 |
+
f" β {gap_interp}",
|
| 513 |
f"",
|
| 514 |
f"{'β' * W}",
|
| 515 |
+
f" INTERNAL DISCOURSE COHERENCE",
|
| 516 |
f"{'β' * W}",
|
| 517 |
+
f" How consistent is the framing within each text?",
|
| 518 |
+
f" A tightly coherent text speaks with one voice on this axis.",
|
| 519 |
+
f" A thematically broad text covers many sub-topics, each",
|
| 520 |
+
f" contributing its own framing β both patterns are linguistically",
|
| 521 |
+
f" meaningful, not errors.",
|
| 522 |
f"",
|
| 523 |
+
f" {name_d1}: {coh_d1}.",
|
| 524 |
+
f" {name_d2}: {coh_d2}.",
|
| 525 |
f"",
|
| 526 |
+
f" For reference β coherence of the pole corpora:",
|
| 527 |
+
f" {name_a} pole: {coh_a}.",
|
| 528 |
+
f" {name_b} pole: {coh_b}.",
|
| 529 |
f"",
|
| 530 |
f"{'β' * W}",
|
| 531 |
+
f" DISCURSIVE SCOPE",
|
| 532 |
f"{'β' * W}",
|
| 533 |
+
f" Along which dimensions do sentences within each text vary?",
|
| 534 |
+
f" This reveals whether this axis captures the main source of",
|
| 535 |
+
f" internal contrast, or whether the text is doing more things",
|
| 536 |
+
f" at once than a single axis can describe.",
|
| 537 |
f"",
|
| 538 |
+
f" {name_d1}:",
|
| 539 |
+
f" {scope_d1}.",
|
| 540 |
+
f"",
|
| 541 |
+
f" {name_d2}:",
|
| 542 |
+
f" {scope_d2}.",
|
| 543 |
f"",
|
| 544 |
]
|
| 545 |
|
| 546 |
if caveats:
|
| 547 |
report_lines += [
|
| 548 |
f"{'β' * W}",
|
| 549 |
+
f" β NOTE",
|
| 550 |
f"{'β' * W}",
|
| 551 |
]
|
| 552 |
for c in caveats:
|
|
|
|
| 560 |
f" {verdict}",
|
| 561 |
f"",
|
| 562 |
f"{'β' * W}",
|
| 563 |
+
f" Scores are computed in the full {MODEL_DIM}-dimensional semantic",
|
| 564 |
+
f" space of {MODEL_NAME}. The 3D map above is a",
|
| 565 |
+
f" dimensionality-reduced view for visual orientation only.",
|
| 566 |
f"{'β' * W}",
|
| 567 |
]
|
| 568 |
report = "\n".join(report_lines)
|