Kevin Xie commited on
Commit
d4af204
·
1 Parent(s): 1470b41

Update leaderboard (7/22)

Browse files

Updated leaderboard with 9 additional model evaluations.

leaderboards/CoT_leaderboard.json ADDED
The diff for this file is too large to render. See raw diff
 
leaderboards/CoT_leaderboard_data.json DELETED
The diff for this file is too large to render. See raw diff
 
leaderboards/Few-Shot_leaderboard.json ADDED
The diff for this file is too large to render. See raw diff
 
leaderboards/Few-Shot_leaderboard_data.json DELETED
The diff for this file is too large to render. See raw diff
 
leaderboards/Zero-Shot_leaderboard.json ADDED
The diff for this file is too large to render. See raw diff
 
leaderboards/Zero-Shot_leaderboard_data.json DELETED
The diff for this file is too large to render. See raw diff
 
task_information.json CHANGED
@@ -40,7 +40,7 @@
40
  "Clinical Stage": "Diagnosis and Prognosis"
41
  },
42
  "Brateca-Hospitalization": {
43
- "Language": "Portuguese\n(Brazilian)",
44
  "Task Type": "Text Classification",
45
  "Clinical Context": "General",
46
  "Data Access": "Regulated",
@@ -48,7 +48,7 @@
48
  "Clinical Stage": "Diagnosis and Prognosis"
49
  },
50
  "Brateca-Mortality": {
51
- "Language": "Portuguese\n(Brazilian)",
52
  "Task Type": "Text Classification",
53
  "Clinical Context": "General",
54
  "Data Access": "Regulated",
@@ -116,7 +116,7 @@
116
  "Task Type": "Event Extraction",
117
  "Clinical Context": "General",
118
  "Data Access": "Open Access",
119
- "Applications": "Temporal & Causality relation",
120
  "Clinical Stage": "Initial Assessment"
121
  },
122
  "C-EMRS": {
@@ -356,7 +356,7 @@
356
  "Task Type": "Named Entity Recognition",
357
  "Clinical Context": "Pediatrics, Psychology",
358
  "Data Access": "Open Access",
359
- "Applications": "Temporal & Causality relation",
360
  "Clinical Stage": "Initial Assessment"
361
  },
362
  "n2c2 2018-ADE&medication": {
@@ -372,7 +372,7 @@
372
  "Task Type": "Named Entity Recognition",
373
  "Clinical Context": "Cardiology",
374
  "Data Access": "Open Access",
375
- "Applications": "Temporal & Causality relation",
376
  "Clinical Stage": "Initial Assessment"
377
  },
378
  "NorSynthClinical-RE": {
@@ -380,7 +380,7 @@
380
  "Task Type": "Event Extraction",
381
  "Clinical Context": "Cardiology",
382
  "Data Access": "Open Access",
383
- "Applications": "Temporal & Causality relation",
384
  "Clinical Stage": "Initial Assessment"
385
  },
386
  "NUBES": {
@@ -396,7 +396,7 @@
396
  "Task Type": "Summarization",
397
  "Clinical Context": "General",
398
  "Data Access": "Open Access",
399
- "Applications": "Consultation summarization",
400
  "Clinical Stage": "Initial Assessment"
401
  },
402
  "MEDIQA 2023-sum-A": {
@@ -412,7 +412,7 @@
412
  "Task Type": "Summarization",
413
  "Clinical Context": "General",
414
  "Data Access": "Open Access",
415
- "Applications": "Consultation summarization",
416
  "Clinical Stage": "Initial Assessment"
417
  },
418
  "RuMedDaNet": {
@@ -468,7 +468,7 @@
468
  "Task Type": "Summarization",
469
  "Clinical Context": "Pediatrics",
470
  "Data Access": "Open Access",
471
- "Applications": "Consultation summarization",
472
  "Clinical Stage": "Initial Assessment"
473
  },
474
  "IMCS-V2-DAC": {
@@ -539,7 +539,7 @@
539
  "Language": "Russian",
540
  "Task Type": "Natural Language Inference",
541
  "Clinical Context": "Critical Care",
542
- "Data Access": "Open Access",
543
  "Applications": "Semantic relation",
544
  "Clinical Stage": "Research"
545
  },
@@ -615,7 +615,7 @@
615
  "Applications": "ADE & Incidents",
616
  "Clinical Stage": "Treatment and Intervention"
617
  },
618
- "IFMIR - NER&factuality": {
619
  "Language": "Japanese",
620
  "Task Type": "Event Extraction",
621
  "Clinical Context": "Pharmacology",
@@ -694,5 +694,21 @@
694
  "Data Access": "Regulated",
695
  "Applications": "Diagnosis",
696
  "Clinical Stage": "Diagnosis and Prognosis"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  }
698
  }
 
40
  "Clinical Stage": "Diagnosis and Prognosis"
41
  },
42
  "Brateca-Hospitalization": {
43
+ "Language": "Portuguese",
44
  "Task Type": "Text Classification",
45
  "Clinical Context": "General",
46
  "Data Access": "Regulated",
 
48
  "Clinical Stage": "Diagnosis and Prognosis"
49
  },
50
  "Brateca-Mortality": {
51
+ "Language": "Portuguese",
52
  "Task Type": "Text Classification",
53
  "Clinical Context": "General",
54
  "Data Access": "Regulated",
 
116
  "Task Type": "Event Extraction",
117
  "Clinical Context": "General",
118
  "Data Access": "Open Access",
119
+ "Applications": "Temporal/Causality determination",
120
  "Clinical Stage": "Initial Assessment"
121
  },
122
  "C-EMRS": {
 
356
  "Task Type": "Named Entity Recognition",
357
  "Clinical Context": "Pediatrics, Psychology",
358
  "Data Access": "Open Access",
359
+ "Applications": "Temporal/Causality determination",
360
  "Clinical Stage": "Initial Assessment"
361
  },
362
  "n2c2 2018-ADE&medication": {
 
372
  "Task Type": "Named Entity Recognition",
373
  "Clinical Context": "Cardiology",
374
  "Data Access": "Open Access",
375
+ "Applications": "Temporal/Causality determination",
376
  "Clinical Stage": "Initial Assessment"
377
  },
378
  "NorSynthClinical-RE": {
 
380
  "Task Type": "Event Extraction",
381
  "Clinical Context": "Cardiology",
382
  "Data Access": "Open Access",
383
+ "Applications": "Temporal/Causality determination",
384
  "Clinical Stage": "Initial Assessment"
385
  },
386
  "NUBES": {
 
396
  "Task Type": "Summarization",
397
  "Clinical Context": "General",
398
  "Data Access": "Open Access",
399
+ "Applications": "Encounter summarization",
400
  "Clinical Stage": "Initial Assessment"
401
  },
402
  "MEDIQA 2023-sum-A": {
 
412
  "Task Type": "Summarization",
413
  "Clinical Context": "General",
414
  "Data Access": "Open Access",
415
+ "Applications": "Encounter summarization",
416
  "Clinical Stage": "Initial Assessment"
417
  },
418
  "RuMedDaNet": {
 
468
  "Task Type": "Summarization",
469
  "Clinical Context": "Pediatrics",
470
  "Data Access": "Open Access",
471
+ "Applications": "Encounter summarization",
472
  "Clinical Stage": "Initial Assessment"
473
  },
474
  "IMCS-V2-DAC": {
 
539
  "Language": "Russian",
540
  "Task Type": "Natural Language Inference",
541
  "Clinical Context": "Critical Care",
542
+ "Data Access": "Regulated",
543
  "Applications": "Semantic relation",
544
  "Clinical Stage": "Research"
545
  },
 
615
  "Applications": "ADE & Incidents",
616
  "Clinical Stage": "Treatment and Intervention"
617
  },
618
+ "IFMIR-NER&factuality": {
619
  "Language": "Japanese",
620
  "Task Type": "Event Extraction",
621
  "Clinical Context": "Pharmacology",
 
694
  "Data Access": "Regulated",
695
  "Applications": "Diagnosis",
696
  "Clinical Stage": "Diagnosis and Prognosis"
697
+ },
698
+ "MIMIC-III Outcome.Diagnosis": {
699
+ "Language": "English",
700
+ "Task Type": "Normalization and Coding",
701
+ "Clinical Context": "Critical Care",
702
+ "Data Access": "Regulated",
703
+ "Applications": "Prognosis",
704
+ "Clinical Stage": "Diagnosis and Prognosis"
705
+ },
706
+ "MIMIC-III Outcome.Procedure": {
707
+ "Language": "English",
708
+ "Task Type": "Normalization and Coding",
709
+ "Clinical Context": "Critical Care",
710
+ "Data Access": "Regulated",
711
+ "Applications": "Prognosis",
712
+ "Clinical Stage": "Diagnosis and Prognosis"
713
  }
714
  }
utils/data_loader.py CHANGED
@@ -22,9 +22,9 @@ class LeaderboardDataLoader:
22
  def _load_leaderboard_data(self) -> Dict[str, Dict]:
23
  """Load all leaderboard JSON data"""
24
  leaderboard_files = {
25
- 'zero_shot': 'leaderboards/Zero-Shot_leaderboard_data.json',
26
- 'few_shot': 'leaderboards/Few-Shot_leaderboard_data.json',
27
- 'cot': 'leaderboards/CoT_leaderboard_data.json'
28
  }
29
 
30
  data = {}
@@ -38,13 +38,12 @@ class LeaderboardDataLoader:
38
  """Create pandas DataFrames from JSON data"""
39
  dataframes = {}
40
  for key in ['zero_shot', 'few_shot', 'cot']:
41
- json_file = f"leaderboards/{key.replace('_', '-').title()}_leaderboard_data.json"
42
  if key == 'few_shot':
43
- json_file = "leaderboards/Few-Shot_leaderboard_data.json"
44
  elif key == 'cot':
45
- json_file = "leaderboards/CoT_leaderboard_data.json"
46
  else:
47
- json_file = "leaderboards/Zero-Shot_leaderboard_data.json"
48
 
49
  dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
50
 
 
22
  def _load_leaderboard_data(self) -> Dict[str, Dict]:
23
  """Load all leaderboard JSON data"""
24
  leaderboard_files = {
25
+ 'zero_shot': 'leaderboards/Zero-Shot_leaderboard.json',
26
+ 'few_shot': 'leaderboards/Few-Shot_leaderboard.json',
27
+ 'cot': 'leaderboards/CoT_leaderboard.json'
28
  }
29
 
30
  data = {}
 
38
  """Create pandas DataFrames from JSON data"""
39
  dataframes = {}
40
  for key in ['zero_shot', 'few_shot', 'cot']:
 
41
  if key == 'few_shot':
42
+ json_file = "leaderboards/Few-Shot_leaderboard.json"
43
  elif key == 'cot':
44
+ json_file = "leaderboards/CoT_leaderboard.json"
45
  else:
46
+ json_file = "leaderboards/Zero-Shot_leaderboard.json"
47
 
48
  dataframes[key] = pd.read_json(self.abs_path / json_file, precise_float=True)
49