hamzabouajila's picture
refactor the code for better scalability and update tsac naming to sentiment analysis, adding madar dataset for transliteration and normalization eval
bde1c71
raw
history blame contribute delete
604 Bytes
from pydantic import BaseModel, Field
class Config(BaseModel):
{
"tsac": {
"path": "fbougares/tsac",
"text_column": "sentence",
"label_column": "target",
"label_map": {0: 0, 1: 1}, # already binary
"trust_remote_code": True
},
"tunisian_sentiment": {
"path": "your-org/tunisian-sentiment", # hypothetical
"text_column": "text",
"label_column": "label",
"label_map": {"negative": 0, "positive": 1, "neutral": -1}, # drop neutral
"trust_remote_code": False
},
# Add more as they become available
}