Spaces:

saeedfarzi
/

eCREAM_LLM_Leaderboard_Beta

Sleeping

Sfarzi commited on Nov 14, 2025

Commit

5bbb459

1 Parent(s): e6c5ca5

Initial clone with modifications

Files changed (3) hide show

src/__pycache__/tasks.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/tasks.cpython-310.pyc and b/src/__pycache__/tasks.cpython-310.pyc differ

src/about.py CHANGED Viewed

@@ -139,8 +139,8 @@ TITLE = """<h1 align="center" id="space-title">🚀 ECREAM-LLM Leaderboard 🚀<
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 <br><br><b>The eCream-LLM leaderboard </b>, developed within <a href='https://ecreamproject.eu/'> the eCream Project </a> (enabling Clinical Research in Emergency and Acute care Medicine), is designed to evaluate Large Language Models (LLMs) on several tasks pertaining to the medical domain. Its distinguishing features are:<b> <br> (i) all tasks are implemented for six languages including English, Italian, Slovak, Slovenian, Polish and Greek; <br> (ii) the leaderboard includes generative tasks, allowing for a more natural interaction with LLMs; <br> (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.</b>
-<br><br>**<small>Generative tasks:</small>** <small> 🏷️NER (Named Entity Recognition), 🔗REL (Relation Extraction), 😃RML(CRF RML) </small>
-<br>**<small>Multiple-choice task:</small>** <small>   🏥DIA (CRF Diagnosis), 📝HIS (CRF History)  </small>
 """

 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 <br><br><b>The eCream-LLM leaderboard </b>, developed within <a href='https://ecreamproject.eu/'> the eCream Project </a> (enabling Clinical Research in Emergency and Acute care Medicine), is designed to evaluate Large Language Models (LLMs) on several tasks pertaining to the medical domain. Its distinguishing features are:<b> <br> (i) all tasks are implemented for six languages including English, Italian, Slovak, Slovenian, Polish and Greek; <br> (ii) the leaderboard includes generative tasks, allowing for a more natural interaction with LLMs; <br> (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.</b>
+<br><br>**<small>Generative tasks:</small>** <small> 🏷️NER-E3C (Named Entity Recognition - E3C), 🔗REL-E3C (Relation Extraction -E3C), 😃CRF-RML(CRF RML), NER-PHA ( Named Entity Recognition - PharamaER.IT)   </small>
+<br>**<small>Multiple-choice task:</small>** <small>   🏥CRF-DIA (CRF Diagnosis), 📝CRF-HIS (CRF History)  </small>
 """

src/tasks.py CHANGED Viewed

@@ -254,15 +254,15 @@ NER_PHARMAER_DESCRIPTION = """### Named Entity Recognition over  (NER)  over Pha
 # Create a dictionary to map task names to their descriptions
 TASK_DESCRIPTIONS = {
-    "RML-CRF": RML_DESCRIPTION,
-    "DIA-CRF": DIA_DESCRIPTION,
-    "HIS-CRF": HIS_DESCRIPTION,
     "AT": AT_DESCRIPTION,
     "WIC": WIC_DESCRIPTION,
     "FAQ": FAQ_DESCRIPTION,
     "LS": LS_DESCRIPTION,
     "SU": SU_DESCRIPTION,
-    "NER": NER_DESCRIPTION,
-    "REL": REL_DESCRIPTION,
-    "NER-PHARMAER": NER_PHARMAER_DESCRIPTION,
 }

 # Create a dictionary to map task names to their descriptions
 TASK_DESCRIPTIONS = {
+    "CRF-RML": RML_DESCRIPTION,
+    "CRF-DIA": DIA_DESCRIPTION,
+    "CRF-HIS": HIS_DESCRIPTION,
     "AT": AT_DESCRIPTION,
     "WIC": WIC_DESCRIPTION,
     "FAQ": FAQ_DESCRIPTION,
     "LS": LS_DESCRIPTION,
     "SU": SU_DESCRIPTION,
+    "NER-E3C": NER_DESCRIPTION,
+    "REL-E3C": REL_DESCRIPTION,
+    "NER-PHA": NER_PHARMAER_DESCRIPTION,
 }