"""Utility helpers for Urdu classification tasks (generative mode). Each process_docs function adds a ``target`` field with the expected Urdu label. process_results + macro_f1_agg compute macro-averaged F1. """ import os as _os, sys as _sys # noqa: E401 _sys.path.insert(0, _os.path.normpath(_os.path.join(_os.path.dirname(__file__), "..","..",))) from f1_utils import macro_f1_agg, process_results_f1 # noqa: F401 # ── Document pre-processing ───────────────────────────────────────── def process_fake_news_docs(dataset): """Map Real/Fake → حقیقی/جعلی.""" label_map = {"Real": "حقیقی", "Fake": "جعلی"} def _map(doc): doc["target"] = label_map.get(doc["label"], doc["label"]) return doc return dataset.map(_map) def process_emotion_docs(dataset): """Map English emotion labels → Urdu.""" label_map = { "sadness": "غم", "neutral": "غیر جانبدار", "fear": "خوف", "anger": "غصہ", "happiness": "خوشی", "surprise": "حیرت", "disgust": "نفرت", } def _map(doc): doc["target"] = label_map.get(doc["label"], doc["label"]) return doc return dataset.map(_map) # ── Result processing ──────────────────────────────────────────────── def process_results(doc, results): """Return (pred, gold) tuple for macro-F1 aggregation.""" return process_results_f1(doc, results)