Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import Any, Dict, Mapping | |
| from .explanation import explain_results | |
| from .features import extract_features | |
| from .normalization import normalize_text | |
| from .scoring import DIALECTS, score_dialects | |
| from .tokenization import tokenize | |
| def classify_text(text: str, *, strip_diacritics: bool = False) -> Dict[str, Any]: | |
| """End-to-end dialect classification pipeline.""" | |
| normalized = normalize_text(text, strip_diacritics=strip_diacritics) | |
| tokens = tokenize(normalized) | |
| features = extract_features(tokens) | |
| scores = score_dialects(features) | |
| dialect = max(scores.items(), key=lambda kv: kv[1])[0] if scores else "Unknown" | |
| confidence = (float(scores.get(dialect, 0.0)) / 100.0) if scores else 0.0 | |
| top_features: Dict[str, Any] = {} | |
| contrib_map: Mapping[str, float] = (features.get("_contributions", {}) or {}).get(dialect, {}) # type: ignore[assignment] | |
| for name, delta in sorted(contrib_map.items(), key=lambda kv: abs(kv[1]), reverse=True)[:6]: | |
| top_features[name] = {"contribution": float(delta)} | |
| explanation = explain_results(features, scores) | |
| return { | |
| "dialect": dialect, | |
| "confidence": confidence, | |
| "scores": scores, | |
| "top_features": top_features, | |
| "explanation": explanation, | |
| } | |