thomascerniglia's picture
Upload 8 files
d0326ea verified
from __future__ import annotations
from typing import Any, List, Mapping, Tuple
from .features import ENDINGS_PLAIN, PARTICLES
def explain_results(feature_dict: Mapping[str, Any], scores: Mapping[str, float]) -> str:
"""Generate a human-readable explanation of the classification."""
if not scores:
return "No scores were produced."
best_dialect = max(scores.items(), key=lambda kv: kv[1])[0]
best_pct = float(scores[best_dialect])
token_count = int(feature_dict.get("token_count", 0) or 0)
particles: Mapping[str, int] = feature_dict.get("particles", {}) or {}
endings: Mapping[str, int] = feature_dict.get("endings", {}) or {}
infinitives: Mapping[str, int] = feature_dict.get("infinitives", {}) or {}
dative_plural: Mapping[str, int] = feature_dict.get("dative_plural_endings", {}) or {}
epic_endings: Mapping[str, int] = feature_dict.get("epic_endings", {}) or {}
epic_particles: Mapping[str, int] = feature_dict.get("epic_particles", {}) or {}
epic_words: Mapping[str, int] = feature_dict.get("epic_words", {}) or {}
prepositions: Mapping[str, int] = feature_dict.get("prepositions", {}) or {}
koine_words: Mapping[str, int] = feature_dict.get("koine_words", {}) or {}
lexical_cues: Mapping[str, int] = feature_dict.get("lexical_cues", {}) or {}
doric_cues: Mapping[str, int] = feature_dict.get("doric_cues", {}) or {}
poetic_morph: Mapping[str, int] = feature_dict.get("poetic_morph", {}) or {}
patterns: Mapping[str, int] = feature_dict.get("patterns", {}) or {}
orth: Mapping[str, int] = feature_dict.get("orthography", {}) or {}
diagnostics = feature_dict.get("diagnostics", {}) or {}
greek_ratio = diagnostics.get("greek_ratio", None)
top_gap_pct = diagnostics.get("top_gap_pct", None)
contrib = (feature_dict.get("_contributions", {}) or {}).get(best_dialect, {}) # type: ignore[assignment]
top_contrib: List[Tuple[str, float]] = sorted(contrib.items(), key=lambda kv: abs(kv[1]), reverse=True)[:8]
particle_bits = ", ".join(f"{p}={int(particles.get(p, 0) or 0)}" for p in PARTICLES)
ending_bits = ", ".join(f"-{e}={int(endings.get(e, 0) or 0)}" for e in (*ENDINGS_PLAIN, "ᾳ"))
orth_bits = (
f"alpha_endings={int(orth.get('alpha_endings', 0) or 0)}, "
f"eta_endings={int(orth.get('eta_endings', 0) or 0)}"
)
lines: List[str] = []
lines.append(f"Prediction: {best_dialect} (confidence {best_pct:.1f}%)")
lines.append(f"Tokens analyzed: {token_count}")
if isinstance(greek_ratio, (int, float)):
lines.append(f"Greek-script ratio (letters): {float(greek_ratio):.2f}")
if float(greek_ratio) < 0.30:
lines.append("Warning: input contains little/no Greek; classification is low-evidence.")
if token_count < 20:
lines.append("Warning: very short passage; confidence may be unreliable.")
if isinstance(top_gap_pct, (int, float)) and float(top_gap_pct) < 10.0:
lines.append("Warning: scores are clustered; dialect signal is weak.")
lines.append("")
lines.append("Observed feature counts:")
lines.append(f" Particles: {particle_bits}")
lines.append(f" Endings: {ending_bits}")
lines.append(
" Infinitives: "
+ ", ".join(
[
f"-ειν={int(infinitives.get('ειν', 0) or 0)}",
f"-μεναι={int(infinitives.get('μεναι', 0) or 0)}",
f"-μεν={int(infinitives.get('μεν', 0) or 0)}",
]
)
)
lines.append(
" Dative plural endings: "
+ ", ".join(
f"-{e}={int(dative_plural.get(e, 0) or 0)}" for e in ("οισι", "ηισι", "αισι", "οις", "αις")
)
)
lines.append(
" Epic: "
+ ", ".join(
[
f"-{e}={int(epic_endings.get(e, 0) or 0)}" for e in ("οιο", "εσσι", "φι", "ηοσ", "αδεω", "ιδεω")
]
+ [
f"{p}={int(epic_particles.get(p, 0) or 0)}" for p in ("κε", "κεν", "αρ", "μιν")
]
+ [
f"{w}={int(epic_words.get(w, 0) or 0)}" for w in ("εννεπε", "αειδε", "μουσα", "μηνιν", "θεα")
]
)
)
lines.append(
f" Patterns: ττ={int(patterns.get('tt', 0) or 0)}, σσ={int(patterns.get('ss', 0) or 0)}"
)
lines.append(
" Prepositions: "
+ ", ".join(
[
f"εἰς={int(prepositions.get('εισ', 0) or 0)}",
f"ἐς={int(prepositions.get('εσ', 0) or 0)}",
]
)
)
lines.append(
" Koine function words: "
+ ", ".join(
[
f"ἵνα={int(koine_words.get('ινα', 0) or 0)}",
f"ὅτι={int(koine_words.get('οτι', 0) or 0)}",
f"καθώς={int(koine_words.get('καθωσ', 0) or 0)}",
f"ἐγένετο={int(koine_words.get('εγενετο', 0) or 0)}",
]
)
)
lines.append(
" Lexicalized cues: "
+ ", ".join(
[
f"TT-stems={int(lexical_cues.get('attic_tt', 0) or 0)}",
f"SS-stems={int(lexical_cues.get('ionic_ss', 0) or 0)}",
]
)
)
lines.append(f" Doric cue: ἁ-initial={int(doric_cues.get('ha_initial', 0) or 0)}")
if poetic_morph:
lines.append(
" Poetic morph: "
+ ", ".join(
[
f"-μες(1pl)={int(poetic_morph.get('verb_1pl_mes', 0) or 0)}",
f"ἄμμι={int(poetic_morph.get('aeolic_ammi', 0) or 0)}",
f"ὔμμι={int(poetic_morph.get('aeolic_ummi', 0) or 0)}",
]
)
)
lines.append(f" Orthography: {orth_bits}")
if top_contrib:
lines.append("")
lines.append(f"Top contributing rules for {best_dialect}:")
for name, delta in top_contrib:
lines.append(f" {name}: {delta:+.3f}")
lines.append("")
lines.append("Note: weights are MVP placeholders; edit dialect_analysis/scoring.py to refine rules.")
return "\n".join(lines)