hamzabouajila's picture
refactor the code for better scalability and update tsac naming to sentiment analysis, adding madar dataset for transliteration and normalization eval
bde1c71
raw
history blame contribute delete
319 Bytes
# src/evaluators/transliteration/datasets.py
TRANSLITERATION_DATASETS = {
"madar-tun": {
"path": "tunis-ai/MADAR-TUN",
"split": "test",
"source_col": "arabish", # Latin
"target_col": "words", # Arabic script
"description": "MADAR-TUN: Arabizi ↔ Arabic script"
}
}