File size: 6,387 Bytes
d0326ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from __future__ import annotations

from typing import Any, List, Mapping, Tuple

from .features import ENDINGS_PLAIN, PARTICLES


def explain_results(feature_dict: Mapping[str, Any], scores: Mapping[str, float]) -> str:
    """Generate a human-readable explanation of the classification."""

    if not scores:
        return "No scores were produced."

    best_dialect = max(scores.items(), key=lambda kv: kv[1])[0]
    best_pct = float(scores[best_dialect])

    token_count = int(feature_dict.get("token_count", 0) or 0)
    particles: Mapping[str, int] = feature_dict.get("particles", {}) or {}
    endings: Mapping[str, int] = feature_dict.get("endings", {}) or {}
    infinitives: Mapping[str, int] = feature_dict.get("infinitives", {}) or {}
    dative_plural: Mapping[str, int] = feature_dict.get("dative_plural_endings", {}) or {}
    epic_endings: Mapping[str, int] = feature_dict.get("epic_endings", {}) or {}
    epic_particles: Mapping[str, int] = feature_dict.get("epic_particles", {}) or {}
    epic_words: Mapping[str, int] = feature_dict.get("epic_words", {}) or {}
    prepositions: Mapping[str, int] = feature_dict.get("prepositions", {}) or {}
    koine_words: Mapping[str, int] = feature_dict.get("koine_words", {}) or {}
    lexical_cues: Mapping[str, int] = feature_dict.get("lexical_cues", {}) or {}
    doric_cues: Mapping[str, int] = feature_dict.get("doric_cues", {}) or {}
    poetic_morph: Mapping[str, int] = feature_dict.get("poetic_morph", {}) or {}
    patterns: Mapping[str, int] = feature_dict.get("patterns", {}) or {}
    orth: Mapping[str, int] = feature_dict.get("orthography", {}) or {}
    diagnostics = feature_dict.get("diagnostics", {}) or {}
    greek_ratio = diagnostics.get("greek_ratio", None)
    top_gap_pct = diagnostics.get("top_gap_pct", None)

    contrib = (feature_dict.get("_contributions", {}) or {}).get(best_dialect, {})  # type: ignore[assignment]
    top_contrib: List[Tuple[str, float]] = sorted(contrib.items(), key=lambda kv: abs(kv[1]), reverse=True)[:8]

    particle_bits = ", ".join(f"{p}={int(particles.get(p, 0) or 0)}" for p in PARTICLES)
    ending_bits = ", ".join(f"-{e}={int(endings.get(e, 0) or 0)}" for e in (*ENDINGS_PLAIN, "ᾳ"))
    orth_bits = (
        f"alpha_endings={int(orth.get('alpha_endings', 0) or 0)}, "
        f"eta_endings={int(orth.get('eta_endings', 0) or 0)}"
    )

    lines: List[str] = []
    lines.append(f"Prediction: {best_dialect} (confidence {best_pct:.1f}%)")
    lines.append(f"Tokens analyzed: {token_count}")

    if isinstance(greek_ratio, (int, float)):
        lines.append(f"Greek-script ratio (letters): {float(greek_ratio):.2f}")
        if float(greek_ratio) < 0.30:
            lines.append("Warning: input contains little/no Greek; classification is low-evidence.")
    if token_count < 20:
        lines.append("Warning: very short passage; confidence may be unreliable.")
    if isinstance(top_gap_pct, (int, float)) and float(top_gap_pct) < 10.0:
        lines.append("Warning: scores are clustered; dialect signal is weak.")
    lines.append("")
    lines.append("Observed feature counts:")
    lines.append(f"  Particles: {particle_bits}")
    lines.append(f"  Endings: {ending_bits}")
    lines.append(
        "  Infinitives: "
        + ", ".join(
            [
                f"-ειν={int(infinitives.get('ειν', 0) or 0)}",
                f"-μεναι={int(infinitives.get('μεναι', 0) or 0)}",
                f"-μεν={int(infinitives.get('μεν', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Dative plural endings: "
        + ", ".join(
            f"-{e}={int(dative_plural.get(e, 0) or 0)}" for e in ("οισι", "ηισι", "αισι", "οις", "αις")
        )
    )
    lines.append(
        "  Epic: "
        + ", ".join(
            [
                f"-{e}={int(epic_endings.get(e, 0) or 0)}" for e in ("οιο", "εσσι", "φι", "ηοσ", "αδεω", "ιδεω")
            ]
            + [
                f"{p}={int(epic_particles.get(p, 0) or 0)}" for p in ("κε", "κεν", "αρ", "μιν")
            ]
            + [
                f"{w}={int(epic_words.get(w, 0) or 0)}" for w in ("εννεπε", "αειδε", "μουσα", "μηνιν", "θεα")
            ]
        )
    )
    lines.append(
        f"  Patterns: ττ={int(patterns.get('tt', 0) or 0)}, σσ={int(patterns.get('ss', 0) or 0)}"
    )
    lines.append(
        "  Prepositions: "
        + ", ".join(
            [
                f"εἰς={int(prepositions.get('εισ', 0) or 0)}",
                f"ἐς={int(prepositions.get('εσ', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Koine function words: "
        + ", ".join(
            [
                f"ἵνα={int(koine_words.get('ινα', 0) or 0)}",
                f"ὅτι={int(koine_words.get('οτι', 0) or 0)}",
                f"καθώς={int(koine_words.get('καθωσ', 0) or 0)}",
                f"ἐγένετο={int(koine_words.get('εγενετο', 0) or 0)}",
            ]
        )
    )
    lines.append(
        "  Lexicalized cues: "
        + ", ".join(
            [
                f"TT-stems={int(lexical_cues.get('attic_tt', 0) or 0)}",
                f"SS-stems={int(lexical_cues.get('ionic_ss', 0) or 0)}",
            ]
        )
    )
    lines.append(f"  Doric cue: ἁ-initial={int(doric_cues.get('ha_initial', 0) or 0)}")
    if poetic_morph:
        lines.append(
            "  Poetic morph: "
            + ", ".join(
                [
                    f"-μες(1pl)={int(poetic_morph.get('verb_1pl_mes', 0) or 0)}",
                    f"ἄμμι={int(poetic_morph.get('aeolic_ammi', 0) or 0)}",
                    f"ὔμμι={int(poetic_morph.get('aeolic_ummi', 0) or 0)}",
                ]
            )
        )
    lines.append(f"  Orthography: {orth_bits}")

    if top_contrib:
        lines.append("")
        lines.append(f"Top contributing rules for {best_dialect}:")
        for name, delta in top_contrib:
            lines.append(f"  {name}: {delta:+.3f}")

    lines.append("")
    lines.append("Note: weights are MVP placeholders; edit dialect_analysis/scoring.py to refine rules.")
    return "\n".join(lines)