Spaces:

thomascerniglia
/

DialectAnalysis

Sleeping

App Files Files Community

DialectAnalysis / dialect_analysis /explanation.py

thomascerniglia

Upload 8 files

d0326ea verified about 1 month ago

raw

history blame contribute delete

6.39 kB

	from __future__ import annotations

	from typing import Any, List, Mapping, Tuple

	from .features import ENDINGS_PLAIN, PARTICLES


	def explain_results(feature_dict: Mapping[str, Any], scores: Mapping[str, float]) -> str:
	"""Generate a human-readable explanation of the classification."""

	if not scores:
	return "No scores were produced."

	best_dialect = max(scores.items(), key=lambda kv: kv[1])[0]
	best_pct = float(scores[best_dialect])

	token_count = int(feature_dict.get("token_count", 0) or 0)
	particles: Mapping[str, int] = feature_dict.get("particles", {}) or {}
	endings: Mapping[str, int] = feature_dict.get("endings", {}) or {}
	infinitives: Mapping[str, int] = feature_dict.get("infinitives", {}) or {}
	dative_plural: Mapping[str, int] = feature_dict.get("dative_plural_endings", {}) or {}
	epic_endings: Mapping[str, int] = feature_dict.get("epic_endings", {}) or {}
	epic_particles: Mapping[str, int] = feature_dict.get("epic_particles", {}) or {}
	epic_words: Mapping[str, int] = feature_dict.get("epic_words", {}) or {}
	prepositions: Mapping[str, int] = feature_dict.get("prepositions", {}) or {}
	koine_words: Mapping[str, int] = feature_dict.get("koine_words", {}) or {}
	lexical_cues: Mapping[str, int] = feature_dict.get("lexical_cues", {}) or {}
	doric_cues: Mapping[str, int] = feature_dict.get("doric_cues", {}) or {}
	poetic_morph: Mapping[str, int] = feature_dict.get("poetic_morph", {}) or {}
	patterns: Mapping[str, int] = feature_dict.get("patterns", {}) or {}
	orth: Mapping[str, int] = feature_dict.get("orthography", {}) or {}
	diagnostics = feature_dict.get("diagnostics", {}) or {}
	greek_ratio = diagnostics.get("greek_ratio", None)
	top_gap_pct = diagnostics.get("top_gap_pct", None)

	contrib = (feature_dict.get("_contributions", {}) or {}).get(best_dialect, {}) # type: ignore[assignment]
	top_contrib: List[Tuple[str, float]] = sorted(contrib.items(), key=lambda kv: abs(kv[1]), reverse=True)[:8]

	particle_bits = ", ".join(f"{p}={int(particles.get(p, 0) or 0)}" for p in PARTICLES)
	ending_bits = ", ".join(f"-{e}={int(endings.get(e, 0) or 0)}" for e in (*ENDINGS_PLAIN, "ᾳ"))
	orth_bits = (
	f"alpha_endings={int(orth.get('alpha_endings', 0) or 0)}, "
	f"eta_endings={int(orth.get('eta_endings', 0) or 0)}"
	)

	lines: List[str] = []
	lines.append(f"Prediction: {best_dialect} (confidence {best_pct:.1f}%)")
	lines.append(f"Tokens analyzed: {token_count}")

	if isinstance(greek_ratio, (int, float)):
	lines.append(f"Greek-script ratio (letters): {float(greek_ratio):.2f}")
	if float(greek_ratio) < 0.30:
	lines.append("Warning: input contains little/no Greek; classification is low-evidence.")
	if token_count < 20:
	lines.append("Warning: very short passage; confidence may be unreliable.")
	if isinstance(top_gap_pct, (int, float)) and float(top_gap_pct) < 10.0:
	lines.append("Warning: scores are clustered; dialect signal is weak.")
	lines.append("")
	lines.append("Observed feature counts:")
	lines.append(f" Particles: {particle_bits}")
	lines.append(f" Endings: {ending_bits}")
	lines.append(
	" Infinitives: "
	+ ", ".join(
	[
	f"-ειν={int(infinitives.get('ειν', 0) or 0)}",
	f"-μεναι={int(infinitives.get('μεναι', 0) or 0)}",
	f"-μεν={int(infinitives.get('μεν', 0) or 0)}",
	]
	)
	)
	lines.append(
	" Dative plural endings: "
	+ ", ".join(
	f"-{e}={int(dative_plural.get(e, 0) or 0)}" for e in ("οισι", "ηισι", "αισι", "οις", "αις")
	)
	)
	lines.append(
	" Epic: "
	+ ", ".join(
	[
	f"-{e}={int(epic_endings.get(e, 0) or 0)}" for e in ("οιο", "εσσι", "φι", "ηοσ", "αδεω", "ιδεω")
	]
	+ [
	f"{p}={int(epic_particles.get(p, 0) or 0)}" for p in ("κε", "κεν", "αρ", "μιν")
	]
	+ [
	f"{w}={int(epic_words.get(w, 0) or 0)}" for w in ("εννεπε", "αειδε", "μουσα", "μηνιν", "θεα")
	]
	)
	)
	lines.append(
	f" Patterns: ττ={int(patterns.get('tt', 0) or 0)}, σσ={int(patterns.get('ss', 0) or 0)}"
	)
	lines.append(
	" Prepositions: "
	+ ", ".join(
	[
	f"εἰς={int(prepositions.get('εισ', 0) or 0)}",
	f"ἐς={int(prepositions.get('εσ', 0) or 0)}",
	]
	)
	)
	lines.append(
	" Koine function words: "
	+ ", ".join(
	[
	f"ἵνα={int(koine_words.get('ινα', 0) or 0)}",
	f"ὅτι={int(koine_words.get('οτι', 0) or 0)}",
	f"καθώς={int(koine_words.get('καθωσ', 0) or 0)}",
	f"ἐγένετο={int(koine_words.get('εγενετο', 0) or 0)}",
	]
	)
	)
	lines.append(
	" Lexicalized cues: "
	+ ", ".join(
	[
	f"TT-stems={int(lexical_cues.get('attic_tt', 0) or 0)}",
	f"SS-stems={int(lexical_cues.get('ionic_ss', 0) or 0)}",
	]
	)
	)
	lines.append(f" Doric cue: ἁ-initial={int(doric_cues.get('ha_initial', 0) or 0)}")
	if poetic_morph:
	lines.append(
	" Poetic morph: "
	+ ", ".join(
	[
	f"-μες(1pl)={int(poetic_morph.get('verb_1pl_mes', 0) or 0)}",
	f"ἄμμι={int(poetic_morph.get('aeolic_ammi', 0) or 0)}",
	f"ὔμμι={int(poetic_morph.get('aeolic_ummi', 0) or 0)}",
	]
	)
	)
	lines.append(f" Orthography: {orth_bits}")

	if top_contrib:
	lines.append("")
	lines.append(f"Top contributing rules for {best_dialect}:")
	for name, delta in top_contrib:
	lines.append(f" {name}: {delta:+.3f}")

	lines.append("")
	lines.append("Note: weights are MVP placeholders; edit dialect_analysis/scoring.py to refine rules.")
	return "\n".join(lines)