TunisianEncodersArena

Runtime error

refactor the code for better scalability and update tsac naming to sentiment analysis, adding madar dataset for transliteration and normalization eval

bde1c71 2 months ago

raw

history blame

3.6 kB

	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class Task:
	benchmark: str # Dataset name
	metric: str # Metric name
	col_name: str # Column name


	class Tasks(Enum):
	sentiment_accuracy = Task("fbougares/tsac", "accuracy", "Accuracy (TSAC) ⬆️")
	sentiment_f1 = Task("fbougares/tsac", "macro_f1", "Macro-F1 (TSAC) ⬆️")
	ner_f1 = Task("arbml/tunisian_ner", "entity_f1", "Entity F1 (NER) ⬆️")
	coverage = Task("arbml/Tunisian_Dialect_Corpus", "coverage", "Corpus Coverage % ⬆️")
	arabizi_robustness = Task("tunis-ai/arabizi_eval", "arabizi_f1", "Arabizi Robustness F1 ⬆️")
	code_switch = Task("tunis-ai/codeswitch_eval", "accuracy", "Code-Switch Accuracy ⬆️")
	typo_robustness = Task("tunis-ai/typo_eval", "f1_drop", "Typo Robustness Drop % ⬇️")

	NUM_FEWSHOT = 0 # Change with your few shot
	# ---------------------------------------------------



	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">Tunisian Dialect Leaderboard</h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	This leaderboard evaluates models and datasets focused on the Tunisian dialect of Arabic.\
	It highlights performance on key resources such as TSAC (fbougares/tsac) and the Tunisian Dialect Corpus (arbml/Tunisian_Dialect_Corpus).
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = f"""
	## How it works

	We evaluate models on:
	- TSAC ([fbougares/tsac](https://huggingface.co/datasets/fbougares/tsac)): Sentiment analysis in Tunisian dialect.
	- Tunisian Dialect Corpus ([arbml/Tunisian_Dialect_Corpus](https://huggingface.co/datasets/arbml/Tunisian_Dialect_Corpus)): Coverage and language understanding.

	## Reproducibility
	To reproduce our results, use the following commands (replace with your model):

	```python
	from transformers import AutoConfig, AutoModel, AutoTokenizer
	config = AutoConfig.from_pretrained("your model name", revision=revision)
	model = AutoModel.from_pretrained("your model name", revision=revision)
	tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
	```
	"""

	EVALUATION_QUEUE_TEXT = """
	## Some good practices before submitting a model

	### 1) Make sure your model is trained or evaluated on Tunisian dialect data (e.g., TSAC, Tunisian Dialect Corpus).

	### 2) Make sure you can load your model and tokenizer using AutoClasses:
	```python
	from transformers import AutoConfig, AutoModel, AutoTokenizer
	config = AutoConfig.from_pretrained("your model name", revision=revision)
	model = AutoModel.from_pretrained("your model name", revision=revision)
	tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
	```
	If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.

	Note: make sure your model is public!
	Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!

	### 3) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
	It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!

	### 4) Make sure your model has an open license!
	This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""
	"""