File size: 344 Bytes
bde1c71
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
# src/evaluators/normalization/datasets.py
NORMALIZATION_DATASETS = {
    "madar-tun": {
        "path": "tunis-ai/MADAR-TUN",
        "split": "test",  # or "test" if available
        "arabish_col": "arabish",
        "canonical_col": "lem",  # could also be "words"
        "description": "MADAR-TUN: Arabizi → Lemma normalization"
    }
}