Spaces:

thomascerniglia
/

DialectAnalysis

Sleeping

File size: 6,387 Bytes

d0326ea

from __future__ import annotations

from typing import Any, List, Mapping, Tuple

from .features import ENDINGS_PLAIN, PARTICLES


def explain_results(feature_dict: Mapping[str, Any], scores: Mapping[str, float]) -> str:
    """Generate a human-readable explanation of the classification."""

    if not scores:
        return "No scores were produced."

    best_dialect = max(scores.items(), key=lambda kv: kv[1])[0]
    best_pct = float(scores[best_dialect])

    token_count = int(feature_dict.get("token_count", 0) or 0)
    particles: Mapping[str, int] = feature_dict.get("particles", {}) or {}
    endings: Mapping[str, int] = feature_dict.get("endings", {}) or {}
    infinitives: Mapping[str, int] = feature_dict.get("infinitives", {}) or {}
    dative_plural: Mapping[str, int] = feature_dict.get("dative_plural_endings", {}) or {}
    epic_endings: Mapping[str, int] = feature_dict.get("epic_endings", {}) or {}
    epic_particles: Mapping[str, int] = feature_dict.get("epic_particles", {}) or {}
    epic_words: Mapping[str, int] = feature_dict.get("epic_words", {}) or {}
    prepositions: Mapping[str, int] = feature_dict.get("prepositions", {}) or {}
    koine_words: Mapping[str, int] = feature_dict.get("koine_words", {}) or {}
    lexical_cues: Mapping[str, int] = feature_dict.get("lexical_cues", {}) or {}
    doric_cues: Mapping[str, int] = feature_dict.get("doric_cues", {}) or {}
    poetic_morph: Mapping[str, int] = feature_dict.get("poetic_morph", {}) or {}
    patterns: Mapping[str, int] = feature_dict.get("patterns", {}) or {}
    orth: Mapping[str, int] = feature_dict.get("orthography", {}) or {}
    diagnostics = feature_dict.get("diagnostics", {}) or {}
    greek_ratio = diagnostics.get("greek_ratio", None)
    top_gap_pct = diagnostics.get("top_gap_pct", None)

    contrib = (feature_dict.get("_contributions", {}) or {}).get(best_dialect, {})  # type: ignore[assignment]
    top_contrib: List[Tuple[str, float]] = sorted(contrib.items(), key=lambda kv: abs(kv[1]), reverse=True)[:8]

    particle_bits = ", ".join(f"{p}={int(particles.get(p, 0) or 0)}" for p in PARTICLES)
    ending_bits = ", ".join(f"-{e}={int(endings.get(e, 0) or 0)}" for e in (*ENDINGS_PLAIN, "ᾳ"))
    orth_bits = (
        f"alpha_endings={int(orth.get('alpha_endings', 0) or 0)}, "
        f"eta_endings={int(orth.get('eta_endings', 0) or 0)}"
    )

    lines: List[str] = []
    lines.append(f"Prediction: {best_dialect} (confidence {best_pct:.1f}%)")
    lines.append(f"Tokens analyzed: {token_count}")

    if isinstance(greek_ratio, (int, float)):
        lines.append(f"Greek-script ratio (letters): {float(greek_ratio):.2f}")
        if float(greek_ratio) < 0.30:
            lines.append("Warning: input contains little/no Greek; classification is low-evidence.")
    if token_count < 20:
        lines.append("Warning: very short passage; confidence may be unreliable.")
    if isinstance(top_gap_pct, (int, float)) and float(top_gap_pct) < 10.0:
        lines.append("Warning: scores are clustered; dialect signal is weak.")
    lines.append("")
    lines.append("Observed feature counts:")
    lines.append(f"  Particles: {particle_bits}")
    lines.append(f"  Endings: {ending_bits}")
    lines.append(
        "  Infinitives: "
        + ", ".join(
            [
                f"-ειν={int(infinitives.get('ειν', 0) or 0)}",
                f"-μεναι={int(infinitives.get('μεναι', 0) or 0)}",
                f"-μεν={int(infinitives.get('μεν', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Dative plural endings: "
        + ", ".join(
            f"-{e}={int(dative_plural.get(e, 0) or 0)}" for e in ("οισι", "ηισι", "αισι", "οις", "αις")
        )
    )
    lines.append(
        "  Epic: "
        + ", ".join(
            [
                f"-{e}={int(epic_endings.get(e, 0) or 0)}" for e in ("οιο", "εσσι", "φι", "ηοσ", "αδεω", "ιδεω")
            ]
            + [
                f"{p}={int(epic_particles.get(p, 0) or 0)}" for p in ("κε", "κεν", "αρ", "μιν")
            ]
            + [
                f"{w}={int(epic_words.get(w, 0) or 0)}" for w in ("εννεπε", "αειδε", "μουσα", "μηνιν", "θεα")
            ]
        )
    )
    lines.append(
        f"  Patterns: ττ={int(patterns.get('tt', 0) or 0)}, σσ={int(patterns.get('ss', 0) or 0)}"
    )
    lines.append(
        "  Prepositions: "
        + ", ".join(
            [
                f"εἰς={int(prepositions.get('εισ', 0) or 0)}",
                f"ἐς={int(prepositions.get('εσ', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Koine function words: "
        + ", ".join(
            [
                f"ἵνα={int(koine_words.get('ινα', 0) or 0)}",
                f"ὅτι={int(koine_words.get('οτι', 0) or 0)}",
                f"καθώς={int(koine_words.get('καθωσ', 0) or 0)}",
                f"ἐγένετο={int(koine_words.get('εγενετο', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Lexicalized cues: "
        + ", ".join(
            [
                f"TT-stems={int(lexical_cues.get('attic_tt', 0) or 0)}",
                f"SS-stems={int(lexical_cues.get('ionic_ss', 0) or 0)}",
            ]
        )
    )
    lines.append(f"  Doric cue: ἁ-initial={int(doric_cues.get('ha_initial', 0) or 0)}")
    if poetic_morph:
        lines.append(
            "  Poetic morph: "
            + ", ".join(
                [
                    f"-μες(1pl)={int(poetic_morph.get('verb_1pl_mes', 0) or 0)}",
                    f"ἄμμι={int(poetic_morph.get('aeolic_ammi', 0) or 0)}",
                    f"ὔμμι={int(poetic_morph.get('aeolic_ummi', 0) or 0)}",
                ]
            )
        )
    lines.append(f"  Orthography: {orth_bits}")

    if top_contrib:
        lines.append("")
        lines.append(f"Top contributing rules for {best_dialect}:")
        for name, delta in top_contrib:
            lines.append(f"  {name}: {delta:+.3f}")

    lines.append("")
    lines.append("Note: weights are MVP placeholders; edit dialect_analysis/scoring.py to refine rules.")
    return "\n".join(lines)