Kevin Xie commited on
Commit ·
d4af204
1
Parent(s): 1470b41
Update leaderboard (7/22)
Browse filesUpdated leaderboard with 9 additional model evaluations.
- leaderboards/CoT_leaderboard.json +0 -0
- leaderboards/CoT_leaderboard_data.json +0 -0
- leaderboards/Few-Shot_leaderboard.json +0 -0
- leaderboards/Few-Shot_leaderboard_data.json +0 -0
- leaderboards/Zero-Shot_leaderboard.json +0 -0
- leaderboards/Zero-Shot_leaderboard_data.json +0 -0
- task_information.json +27 -11
- utils/data_loader.py +6 -7
leaderboards/CoT_leaderboard.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/CoT_leaderboard_data.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/Few-Shot_leaderboard.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/Few-Shot_leaderboard_data.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/Zero-Shot_leaderboard.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/Zero-Shot_leaderboard_data.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
task_information.json
CHANGED
|
@@ -40,7 +40,7 @@
|
|
| 40 |
"Clinical Stage": "Diagnosis and Prognosis"
|
| 41 |
},
|
| 42 |
"Brateca-Hospitalization": {
|
| 43 |
-
"Language": "Portuguese
|
| 44 |
"Task Type": "Text Classification",
|
| 45 |
"Clinical Context": "General",
|
| 46 |
"Data Access": "Regulated",
|
|
@@ -48,7 +48,7 @@
|
|
| 48 |
"Clinical Stage": "Diagnosis and Prognosis"
|
| 49 |
},
|
| 50 |
"Brateca-Mortality": {
|
| 51 |
-
"Language": "Portuguese
|
| 52 |
"Task Type": "Text Classification",
|
| 53 |
"Clinical Context": "General",
|
| 54 |
"Data Access": "Regulated",
|
|
@@ -116,7 +116,7 @@
|
|
| 116 |
"Task Type": "Event Extraction",
|
| 117 |
"Clinical Context": "General",
|
| 118 |
"Data Access": "Open Access",
|
| 119 |
-
"Applications": "Temporal
|
| 120 |
"Clinical Stage": "Initial Assessment"
|
| 121 |
},
|
| 122 |
"C-EMRS": {
|
|
@@ -356,7 +356,7 @@
|
|
| 356 |
"Task Type": "Named Entity Recognition",
|
| 357 |
"Clinical Context": "Pediatrics, Psychology",
|
| 358 |
"Data Access": "Open Access",
|
| 359 |
-
"Applications": "Temporal
|
| 360 |
"Clinical Stage": "Initial Assessment"
|
| 361 |
},
|
| 362 |
"n2c2 2018-ADE&medication": {
|
|
@@ -372,7 +372,7 @@
|
|
| 372 |
"Task Type": "Named Entity Recognition",
|
| 373 |
"Clinical Context": "Cardiology",
|
| 374 |
"Data Access": "Open Access",
|
| 375 |
-
"Applications": "Temporal
|
| 376 |
"Clinical Stage": "Initial Assessment"
|
| 377 |
},
|
| 378 |
"NorSynthClinical-RE": {
|
|
@@ -380,7 +380,7 @@
|
|
| 380 |
"Task Type": "Event Extraction",
|
| 381 |
"Clinical Context": "Cardiology",
|
| 382 |
"Data Access": "Open Access",
|
| 383 |
-
"Applications": "Temporal
|
| 384 |
"Clinical Stage": "Initial Assessment"
|
| 385 |
},
|
| 386 |
"NUBES": {
|
|
@@ -396,7 +396,7 @@
|
|
| 396 |
"Task Type": "Summarization",
|
| 397 |
"Clinical Context": "General",
|
| 398 |
"Data Access": "Open Access",
|
| 399 |
-
"Applications": "
|
| 400 |
"Clinical Stage": "Initial Assessment"
|
| 401 |
},
|
| 402 |
"MEDIQA 2023-sum-A": {
|
|
@@ -412,7 +412,7 @@
|
|
| 412 |
"Task Type": "Summarization",
|
| 413 |
"Clinical Context": "General",
|
| 414 |
"Data Access": "Open Access",
|
| 415 |
-
"Applications": "
|
| 416 |
"Clinical Stage": "Initial Assessment"
|
| 417 |
},
|
| 418 |
"RuMedDaNet": {
|
|
@@ -468,7 +468,7 @@
|
|
| 468 |
"Task Type": "Summarization",
|
| 469 |
"Clinical Context": "Pediatrics",
|
| 470 |
"Data Access": "Open Access",
|
| 471 |
-
"Applications": "
|
| 472 |
"Clinical Stage": "Initial Assessment"
|
| 473 |
},
|
| 474 |
"IMCS-V2-DAC": {
|
|
@@ -539,7 +539,7 @@
|
|
| 539 |
"Language": "Russian",
|
| 540 |
"Task Type": "Natural Language Inference",
|
| 541 |
"Clinical Context": "Critical Care",
|
| 542 |
-
"Data Access": "
|
| 543 |
"Applications": "Semantic relation",
|
| 544 |
"Clinical Stage": "Research"
|
| 545 |
},
|
|
@@ -615,7 +615,7 @@
|
|
| 615 |
"Applications": "ADE & Incidents",
|
| 616 |
"Clinical Stage": "Treatment and Intervention"
|
| 617 |
},
|
| 618 |
-
"IFMIR
|
| 619 |
"Language": "Japanese",
|
| 620 |
"Task Type": "Event Extraction",
|
| 621 |
"Clinical Context": "Pharmacology",
|
|
@@ -694,5 +694,21 @@
|
|
| 694 |
"Data Access": "Regulated",
|
| 695 |
"Applications": "Diagnosis",
|
| 696 |
"Clinical Stage": "Diagnosis and Prognosis"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
}
|
| 698 |
}
|
|
|
|
| 40 |
"Clinical Stage": "Diagnosis and Prognosis"
|
| 41 |
},
|
| 42 |
"Brateca-Hospitalization": {
|
| 43 |
+
"Language": "Portuguese",
|
| 44 |
"Task Type": "Text Classification",
|
| 45 |
"Clinical Context": "General",
|
| 46 |
"Data Access": "Regulated",
|
|
|
|
| 48 |
"Clinical Stage": "Diagnosis and Prognosis"
|
| 49 |
},
|
| 50 |
"Brateca-Mortality": {
|
| 51 |
+
"Language": "Portuguese",
|
| 52 |
"Task Type": "Text Classification",
|
| 53 |
"Clinical Context": "General",
|
| 54 |
"Data Access": "Regulated",
|
|
|
|
| 116 |
"Task Type": "Event Extraction",
|
| 117 |
"Clinical Context": "General",
|
| 118 |
"Data Access": "Open Access",
|
| 119 |
+
"Applications": "Temporal/Causality determination",
|
| 120 |
"Clinical Stage": "Initial Assessment"
|
| 121 |
},
|
| 122 |
"C-EMRS": {
|
|
|
|
| 356 |
"Task Type": "Named Entity Recognition",
|
| 357 |
"Clinical Context": "Pediatrics, Psychology",
|
| 358 |
"Data Access": "Open Access",
|
| 359 |
+
"Applications": "Temporal/Causality determination",
|
| 360 |
"Clinical Stage": "Initial Assessment"
|
| 361 |
},
|
| 362 |
"n2c2 2018-ADE&medication": {
|
|
|
|
| 372 |
"Task Type": "Named Entity Recognition",
|
| 373 |
"Clinical Context": "Cardiology",
|
| 374 |
"Data Access": "Open Access",
|
| 375 |
+
"Applications": "Temporal/Causality determination",
|
| 376 |
"Clinical Stage": "Initial Assessment"
|
| 377 |
},
|
| 378 |
"NorSynthClinical-RE": {
|
|
|
|
| 380 |
"Task Type": "Event Extraction",
|
| 381 |
"Clinical Context": "Cardiology",
|
| 382 |
"Data Access": "Open Access",
|
| 383 |
+
"Applications": "Temporal/Causality determination",
|
| 384 |
"Clinical Stage": "Initial Assessment"
|
| 385 |
},
|
| 386 |
"NUBES": {
|
|
|
|
| 396 |
"Task Type": "Summarization",
|
| 397 |
"Clinical Context": "General",
|
| 398 |
"Data Access": "Open Access",
|
| 399 |
+
"Applications": "Encounter summarization",
|
| 400 |
"Clinical Stage": "Initial Assessment"
|
| 401 |
},
|
| 402 |
"MEDIQA 2023-sum-A": {
|
|
|
|
| 412 |
"Task Type": "Summarization",
|
| 413 |
"Clinical Context": "General",
|
| 414 |
"Data Access": "Open Access",
|
| 415 |
+
"Applications": "Encounter summarization",
|
| 416 |
"Clinical Stage": "Initial Assessment"
|
| 417 |
},
|
| 418 |
"RuMedDaNet": {
|
|
|
|
| 468 |
"Task Type": "Summarization",
|
| 469 |
"Clinical Context": "Pediatrics",
|
| 470 |
"Data Access": "Open Access",
|
| 471 |
+
"Applications": "Encounter summarization",
|
| 472 |
"Clinical Stage": "Initial Assessment"
|
| 473 |
},
|
| 474 |
"IMCS-V2-DAC": {
|
|
|
|
| 539 |
"Language": "Russian",
|
| 540 |
"Task Type": "Natural Language Inference",
|
| 541 |
"Clinical Context": "Critical Care",
|
| 542 |
+
"Data Access": "Regulated",
|
| 543 |
"Applications": "Semantic relation",
|
| 544 |
"Clinical Stage": "Research"
|
| 545 |
},
|
|
|
|
| 615 |
"Applications": "ADE & Incidents",
|
| 616 |
"Clinical Stage": "Treatment and Intervention"
|
| 617 |
},
|
| 618 |
+
"IFMIR-NER&factuality": {
|
| 619 |
"Language": "Japanese",
|
| 620 |
"Task Type": "Event Extraction",
|
| 621 |
"Clinical Context": "Pharmacology",
|
|
|
|
| 694 |
"Data Access": "Regulated",
|
| 695 |
"Applications": "Diagnosis",
|
| 696 |
"Clinical Stage": "Diagnosis and Prognosis"
|
| 697 |
+
},
|
| 698 |
+
"MIMIC-III Outcome.Diagnosis": {
|
| 699 |
+
"Language": "English",
|
| 700 |
+
"Task Type": "Normalization and Coding",
|
| 701 |
+
"Clinical Context": "Critical Care",
|
| 702 |
+
"Data Access": "Regulated",
|
| 703 |
+
"Applications": "Prognosis",
|
| 704 |
+
"Clinical Stage": "Diagnosis and Prognosis"
|
| 705 |
+
},
|
| 706 |
+
"MIMIC-III Outcome.Procedure": {
|
| 707 |
+
"Language": "English",
|
| 708 |
+
"Task Type": "Normalization and Coding",
|
| 709 |
+
"Clinical Context": "Critical Care",
|
| 710 |
+
"Data Access": "Regulated",
|
| 711 |
+
"Applications": "Prognosis",
|
| 712 |
+
"Clinical Stage": "Diagnosis and Prognosis"
|
| 713 |
}
|
| 714 |
}
|
utils/data_loader.py
CHANGED
|
@@ -22,9 +22,9 @@ class LeaderboardDataLoader:
|
|
| 22 |
def _load_leaderboard_data(self) -> Dict[str, Dict]:
|
| 23 |
"""Load all leaderboard JSON data"""
|
| 24 |
leaderboard_files = {
|
| 25 |
-
'zero_shot': 'leaderboards/Zero-
|
| 26 |
-
'few_shot': 'leaderboards/Few-
|
| 27 |
-
'cot': 'leaderboards/
|
| 28 |
}
|
| 29 |
|
| 30 |
data = {}
|
|
@@ -38,13 +38,12 @@ class LeaderboardDataLoader:
|
|
| 38 |
"""Create pandas DataFrames from JSON data"""
|
| 39 |
dataframes = {}
|
| 40 |
for key in ['zero_shot', 'few_shot', 'cot']:
|
| 41 |
-
json_file = f"leaderboards/{key.replace('_', '-').title()}_leaderboard_data.json"
|
| 42 |
if key == 'few_shot':
|
| 43 |
-
json_file = "leaderboards/Few-
|
| 44 |
elif key == 'cot':
|
| 45 |
-
json_file = "leaderboards/
|
| 46 |
else:
|
| 47 |
-
json_file = "leaderboards/Zero-
|
| 48 |
|
| 49 |
dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
|
| 50 |
|
|
|
|
| 22 |
def _load_leaderboard_data(self) -> Dict[str, Dict]:
|
| 23 |
"""Load all leaderboard JSON data"""
|
| 24 |
leaderboard_files = {
|
| 25 |
+
'zero_shot': 'leaderboards/Zero-Shot_leaderboard.json',
|
| 26 |
+
'few_shot': 'leaderboards/Few-Shot_leaderboard.json',
|
| 27 |
+
'cot': 'leaderboards/CoT_leaderboard.json'
|
| 28 |
}
|
| 29 |
|
| 30 |
data = {}
|
|
|
|
| 38 |
"""Create pandas DataFrames from JSON data"""
|
| 39 |
dataframes = {}
|
| 40 |
for key in ['zero_shot', 'few_shot', 'cot']:
|
|
|
|
| 41 |
if key == 'few_shot':
|
| 42 |
+
json_file = "leaderboards/Few-Shot_leaderboard.json"
|
| 43 |
elif key == 'cot':
|
| 44 |
+
json_file = "leaderboards/CoT_leaderboard.json"
|
| 45 |
else:
|
| 46 |
+
json_file = "leaderboards/Zero-Shot_leaderboard.json"
|
| 47 |
|
| 48 |
dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
|
| 49 |
|