Spaces:
Sleeping
Sleeping
File size: 6,387 Bytes
d0326ea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | from __future__ import annotations
from typing import Any, List, Mapping, Tuple
from .features import ENDINGS_PLAIN, PARTICLES
def explain_results(feature_dict: Mapping[str, Any], scores: Mapping[str, float]) -> str:
"""Generate a human-readable explanation of the classification."""
if not scores:
return "No scores were produced."
best_dialect = max(scores.items(), key=lambda kv: kv[1])[0]
best_pct = float(scores[best_dialect])
token_count = int(feature_dict.get("token_count", 0) or 0)
particles: Mapping[str, int] = feature_dict.get("particles", {}) or {}
endings: Mapping[str, int] = feature_dict.get("endings", {}) or {}
infinitives: Mapping[str, int] = feature_dict.get("infinitives", {}) or {}
dative_plural: Mapping[str, int] = feature_dict.get("dative_plural_endings", {}) or {}
epic_endings: Mapping[str, int] = feature_dict.get("epic_endings", {}) or {}
epic_particles: Mapping[str, int] = feature_dict.get("epic_particles", {}) or {}
epic_words: Mapping[str, int] = feature_dict.get("epic_words", {}) or {}
prepositions: Mapping[str, int] = feature_dict.get("prepositions", {}) or {}
koine_words: Mapping[str, int] = feature_dict.get("koine_words", {}) or {}
lexical_cues: Mapping[str, int] = feature_dict.get("lexical_cues", {}) or {}
doric_cues: Mapping[str, int] = feature_dict.get("doric_cues", {}) or {}
poetic_morph: Mapping[str, int] = feature_dict.get("poetic_morph", {}) or {}
patterns: Mapping[str, int] = feature_dict.get("patterns", {}) or {}
orth: Mapping[str, int] = feature_dict.get("orthography", {}) or {}
diagnostics = feature_dict.get("diagnostics", {}) or {}
greek_ratio = diagnostics.get("greek_ratio", None)
top_gap_pct = diagnostics.get("top_gap_pct", None)
contrib = (feature_dict.get("_contributions", {}) or {}).get(best_dialect, {}) # type: ignore[assignment]
top_contrib: List[Tuple[str, float]] = sorted(contrib.items(), key=lambda kv: abs(kv[1]), reverse=True)[:8]
particle_bits = ", ".join(f"{p}={int(particles.get(p, 0) or 0)}" for p in PARTICLES)
ending_bits = ", ".join(f"-{e}={int(endings.get(e, 0) or 0)}" for e in (*ENDINGS_PLAIN, "ᾳ"))
orth_bits = (
f"alpha_endings={int(orth.get('alpha_endings', 0) or 0)}, "
f"eta_endings={int(orth.get('eta_endings', 0) or 0)}"
)
lines: List[str] = []
lines.append(f"Prediction: {best_dialect} (confidence {best_pct:.1f}%)")
lines.append(f"Tokens analyzed: {token_count}")
if isinstance(greek_ratio, (int, float)):
lines.append(f"Greek-script ratio (letters): {float(greek_ratio):.2f}")
if float(greek_ratio) < 0.30:
lines.append("Warning: input contains little/no Greek; classification is low-evidence.")
if token_count < 20:
lines.append("Warning: very short passage; confidence may be unreliable.")
if isinstance(top_gap_pct, (int, float)) and float(top_gap_pct) < 10.0:
lines.append("Warning: scores are clustered; dialect signal is weak.")
lines.append("")
lines.append("Observed feature counts:")
lines.append(f" Particles: {particle_bits}")
lines.append(f" Endings: {ending_bits}")
lines.append(
" Infinitives: "
+ ", ".join(
[
f"-ειν={int(infinitives.get('ειν', 0) or 0)}",
f"-μεναι={int(infinitives.get('μεναι', 0) or 0)}",
f"-μεν={int(infinitives.get('μεν', 0) or 0)}",
]
)
)
lines.append(
" Dative plural endings: "
+ ", ".join(
f"-{e}={int(dative_plural.get(e, 0) or 0)}" for e in ("οισι", "ηισι", "αισι", "οις", "αις")
)
)
lines.append(
" Epic: "
+ ", ".join(
[
f"-{e}={int(epic_endings.get(e, 0) or 0)}" for e in ("οιο", "εσσι", "φι", "ηοσ", "αδεω", "ιδεω")
]
+ [
f"{p}={int(epic_particles.get(p, 0) or 0)}" for p in ("κε", "κεν", "αρ", "μιν")
]
+ [
f"{w}={int(epic_words.get(w, 0) or 0)}" for w in ("εννεπε", "αειδε", "μουσα", "μηνιν", "θεα")
]
)
)
lines.append(
f" Patterns: ττ={int(patterns.get('tt', 0) or 0)}, σσ={int(patterns.get('ss', 0) or 0)}"
)
lines.append(
" Prepositions: "
+ ", ".join(
[
f"εἰς={int(prepositions.get('εισ', 0) or 0)}",
f"ἐς={int(prepositions.get('εσ', 0) or 0)}",
]
)
)
lines.append(
" Koine function words: "
+ ", ".join(
[
f"ἵνα={int(koine_words.get('ινα', 0) or 0)}",
f"ὅτι={int(koine_words.get('οτι', 0) or 0)}",
f"καθώς={int(koine_words.get('καθωσ', 0) or 0)}",
f"ἐγένετο={int(koine_words.get('εγενετο', 0) or 0)}",
]
)
)
lines.append(
" Lexicalized cues: "
+ ", ".join(
[
f"TT-stems={int(lexical_cues.get('attic_tt', 0) or 0)}",
f"SS-stems={int(lexical_cues.get('ionic_ss', 0) or 0)}",
]
)
)
lines.append(f" Doric cue: ἁ-initial={int(doric_cues.get('ha_initial', 0) or 0)}")
if poetic_morph:
lines.append(
" Poetic morph: "
+ ", ".join(
[
f"-μες(1pl)={int(poetic_morph.get('verb_1pl_mes', 0) or 0)}",
f"ἄμμι={int(poetic_morph.get('aeolic_ammi', 0) or 0)}",
f"ὔμμι={int(poetic_morph.get('aeolic_ummi', 0) or 0)}",
]
)
)
lines.append(f" Orthography: {orth_bits}")
if top_contrib:
lines.append("")
lines.append(f"Top contributing rules for {best_dialect}:")
for name, delta in top_contrib:
lines.append(f" {name}: {delta:+.3f}")
lines.append("")
lines.append("Note: weights are MVP placeholders; edit dialect_analysis/scoring.py to refine rules.")
return "\n".join(lines)
|