from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str metric_type: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard #task1 = Task("text-entailment_1", "acc", "CPS", "TE") #task2 = Task("text-entailment_2", "acc", "average_accuracy", "TE Prompt Average") #task3 = Task("text-entailment_3", "acc", "std_accuracy", "TE Prompt Std") #task4 = Task("text-entailment_4", "acc", "best_prompt", "TE Best Prompt") #task5 = Task("text-entailment_5", "acc", "prompt_id", "TE Best Prompt Id") #task6 = Task("sentiment-analysis_1", "acc", "CPS", "SA") #task7 = Task("sentiment-analysis_2", "acc", "average_accuracy", "SA Prompt Average") #task8 = Task("sentiment-analysis_3", "acc", "std_accuracy", "SA STD Accuracy") #task9 = Task("sentiment-analysis_4", "acc", "best_prompt", "SA Best Prompt") #task10 = Task("sentiment-analysis_5", "acc", "prompt_id", "SA Best Prompt Id") #task11 = Task("hate-speech-detection_1", "acc", "CPS", "HS") #task12 = Task("hate-speech-detection_2", "acc", "average_accuracy", "HS Prompt Average") #task13 = Task("hate-speech-detection_3", "acc", "std_accuracy", "HS Prompt Std") #task14 = Task("hate-speech-detection_4", "acc", "best_prompt", "HS Best Prompt") #task15 = Task("hate-speech-detection_5", "acc", "prompt_id", "HS Best Prompt Id") #task16 = Task("admission-test_1", "acc", "CPS", "AT") #task17 = Task("admission-test_2", "acc", "average_accuracy", "AT Prompt Average") #task18 = Task("admission-test_3", "acc", "std_accuracy", "AT Prompt Std") #task19 = Task("admission-test_4", "acc", "best_prompt", "AT Best Prompt") #task20 = Task("admission-test_5", "acc", "prompt_id", "AT Best Prompt Id") #task21 = Task("word-in-context_1", "acc", "CPS", "WIC") #task22 = Task("word-in-context_2", "acc", "average_accuracy", "WIC Prompt Average") #task23 = Task("word-in-context_3", "acc", "std_accuracy", "WIC Prompt Std") #task24 = Task("word-in-context_4", "acc", "best_prompt", "WIC Best Prompt") #task25 = Task("word-in-context_5", "acc", "prompt_id", "WIC Best Prompt Id") #task26 = Task("faq_1", "acc", "CPS", "FAQ") #task27 = Task("faq_2", "acc", "average_accuracy", "FAQ Prompt Average") #task28 = Task("faq_3", "acc", "std_accuracy", "FAQ Prompt Std") #task29 = Task("faq_4", "acc", "best_prompt", "FAQ Best Prompt") #task30 = Task("faq_5", "acc", "prompt_id", "FAQ Best Prompt Id") #task31 = Task("lexical-substitution_1", "acc", "CPS", "LS") #task32 = Task("lexical-substitution_2", "acc", "average_accuracy", "LS Prompt Average") #task33 = Task("lexical-substitution_3", "acc", "std_accuracy", "LS Prompt Std") #task34 = Task("lexical-substitution_4", "acc", "best_prompt", "LS Best Prompt") #task35 = Task("lexical-substitution_5", "acc", "prompt_id", "LS Best Prompt Id") #task36 = Task("summarization-fanpage_1", "acc", "CPS", "SU") #task37 = Task("summarization-fanpage_2", "acc", "average_accuracy", "SU Prompt Average") #task38 = Task("summarization-fanpage_3", "acc", "std_accuracy", "SU Prompt Std") #task39 = Task("summarization-fanpage_4", "acc", "best_prompt", "SU Best Prompt") #task40 = Task("summarization-fanpage_5", "acc", "prompt_id", "SU Best Prompt Id") #task41 = Task("evalita NER_1", "acc", "CPS", "NER") #task42 = Task("evalita NER_2", "acc", "average_accuracy", "NER Prompt Average") #task43 = Task("evalita NER_3", "acc", "std_accuracy", "NER Prompt Std") #task44 = Task("evalita NER_4", "acc", "best_prompt", "NER Best Prompt") #task45 = Task("evalita NER_5", "acc", "prompt_id", "NER Best Prompt Id") #task46 = Task("relation-extraction_1", "acc", "CPS", "REL") #task47 = Task("relation-extraction_2", "acc", "average_accuracy", "REL Prompt Average") #task48 = Task("relation-extraction_5", "acc", "std_accuracy", "REL Prompt Std") #task49 = Task("relation-extraction_3", "acc", "best_prompt", "REL Best Prompt") #task50 = Task("relation-extraction_4", "acc", "prompt_id", "REL Best Prompt Id") task1 = Task("RE_1", "acc", "CPS", "REL-E3C") task2 = Task("RE_2", "acc", "average_accuracy", "REL-E3C Prompt Average") task3 = Task("RE_5", "acc", "std_accuracy", "REL-E3C Prompt Std") task4 = Task("RE_3", "acc", "best_prompt", "REL-E3C Best Prompt") task5 = Task("RE_4", "acc", "prompt_id", "REL-E3C Best Prompt Id") task6 = Task("NER_1", "acc", "CPS", "NER-E3C") task7 = Task("NER_2", "acc", "average_accuracy", "NER-E3C Prompt Average") task8 = Task("NER_3", "acc", "std_accuracy", "NER-E3C Prompt Std") task9 = Task("NER_4", "acc", "best_prompt", "NER-E3C Best Prompt") task10 = Task("NER_5", "acc", "prompt_id", "NER-E3C Best Prompt Id") task11 = Task("RML-CRF_1", "acc", "CPS", "CRF-RML") task12 = Task("RML-CRF_2", "acc", "average_accuracy", "CRF-RML Prompt Average") task13 = Task("RML-CRF_3", "acc", "std_accuracy", "CRF-RML Prompt Std") task14 = Task("RML-CRF_4", "acc", "best_prompt", "CRF-RML Best Prompt") task15 = Task("RML-CRF_5", "acc", "prompt_id", "CRF-RML Best Prompt Id") task16 = Task("DIA-CRF_1", "acc", "CPS", "CRF-DIA") task17 = Task("DIA-CRF_2", "acc", "average_accuracy", "CRF-DIA Prompt Average") task18 = Task("DIA-CRF_3", "acc", "std_accuracy", "CRF-DIA Prompt Std") task19 = Task("DIA-CRF_4", "acc", "best_prompt", "CRF-DIA Best Prompt") task20 = Task("DIA-CRF_5", "acc", "prompt_id", "CRF-DIA Best Prompt Id") task21 = Task("HIS-CRF_1", "acc", "CPS", "CRF-HIS") task22 = Task("HIS-CRF_2", "acc", "average_accuracy", "CRF-HIS Prompt Average") task23 = Task("HIS-CRF_3", "acc", "std_accuracy", "CRF-HIS Prompt Std") task24 = Task("HIS-CRF_4", "acc", "best_prompt", "CRF-HIS Best Prompt") task25 = Task("HIS-CRF_5", "acc", "prompt_id", "CRF-HIS Best Prompt Id") task26 = Task("NER-PHARMAER_1", "acc", "CPS", "NER-PHA") task27 = Task("NER-PHARMAER_2", "acc", "average_accuracy", "NER-PHA Prompt Average") task28 = Task("NER-PHARMAER_3", "acc", "std_accuracy", "NER-PHA Prompt Std") task29 = Task("NER-PHARMAER_4", "acc", "best_prompt", "NER-PHA Best Prompt") task30 = Task("NER-PHARMAER_5", "acc", "prompt_id", "NER-PHA Best Prompt Id") ''' task0 = Task("TextualEntailment", "acc", "Textual Entailment") task1 = Task("TextualEntailment_best", "acc", "TextualEntailment Best") task2 = Task("Sentiment Analysis", "acc", "Sentiment Analysis") task3 = Task("Sentiment Analysis_best", "acc", "Sentiment Analysis_best") task4 = Task("Hate Speech", "acc", "Hate Speech") task5 = Task("Hate Speech_best", "acc", "Hate Speech_best") task6 = Task("Admission Test", "acc", "Admission Test") task7 = Task("Admission Test_best", "acc", "Admission Test_best") task8 = Task("Word in Context", "acc", "Word in Context") task9 = Task("Word in Context_best", "acc", "Word in Context_best") task10 = Task("FAQ", "acc", "FAQ") task11 = Task("FAQ_best", "acc", "FAQ_best") task12 = Task("Lexical Substitution", "acc", "Lexical Substitution") task13 = Task("Lexical Substitution_best", "acc", "Lexical Substitution_best") task14 = Task("Summarization", "acc", "Summarization") task15 = Task("Summarization_best", "acc", "Summarization_best") task16 = Task("NER", "acc", "NER") task17 = Task("NER_best", "acc", "NER_best") task18 = Task("REL", "acc", "REL") task19 = Task("REL_best", "acc", "REL_best") ''' # Your leaderboard name TITLE = """