Sfarzi commited on
Commit
e6c5ca5
Β·
1 Parent(s): 16ce187

Initial clone with modifications

Browse files
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -52,7 +52,7 @@ def create_best_model_comparison_table(dataframe, lang: str | None = None, shot:
52
  - lang in {EN, IT, SL, SK, GR, PL} or None/"All"
53
  - shot in {"0","10"} or None/"All" (mapped to IS_FS False/True)
54
  """
55
- tasks = ["NER", "REL", "RML", "HIS", "DIA"]
56
  df = dataframe.copy()
57
 
58
  if lang and lang != "All" and "LANG" in df.columns:
@@ -141,7 +141,7 @@ def create_best_model_comparison_table_without_lang(dataframe):
141
  Table with the best overall model per task (NER, REL,) and the model that
142
  achieves the best score with its own best prompt.
143
  """
144
- tasks = ["NER", "REL", "RML", "HIS", "DIA"]
145
  table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
146
 
147
  for task in tasks:
@@ -216,7 +216,7 @@ def create_prompt_heatmap(dataframe, lang: str | None = None, shot: str | None =
216
  - lang: None or one of EN/IT/SL/SK/GR/PL (None means All)
217
  - shot: None or "0"/"10" (None means All) mapped to IS_FS False/True
218
  """
219
- tasks = ["NER", "REL", "RML", "HIS", "DIA"]
220
 
221
  df = dataframe.copy()
222
  # Language filter
@@ -236,6 +236,8 @@ def create_prompt_heatmap(dataframe, lang: str | None = None, shot: str | None =
236
  for task in tasks:
237
  col = f"{task} Best Prompt Id"
238
  if col in df.columns:
 
 
239
  all_ids.update(df[col].dropna().unique())
240
  prompt_ids_raw = sorted(list(all_ids), key=lambda x: int(re.sub(r'[^0-9]', '', str(x)) or 0))
241
  prompt_ids_raw = [pid for pid in prompt_ids_raw if label_for(pid) in {"p1", "p2", "p3"}] or [1, 2, 3]
@@ -294,7 +296,7 @@ def create_prompt_heatmap_without_lang(dataframe):
294
  for tasks NER and REL, with exactly 3 prompts (p1, p2, p3). It supports columns storing
295
  ids as integers (1/2/3) or strings ('p1'/'p2'/'p3').
296
  """
297
- tasks = ["NER", "REL", "RML", "HIS", "DIA"]
298
 
299
  # Collect unique prompt ids as they appear (int or 'pX'); restrict to 3 prompts
300
  all_ids = set()
@@ -395,10 +397,11 @@ def mean_of_max_per_field(df):
395
  float: media dei valori massimi dei campi
396
  """
397
  #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
398
- fields = ["NER", "REL", "RML", "DIA", "HIS"]
399
  #print(df.columns)
400
 
401
  # Controlla che tutte le colonne esistano nel DataFrame
 
402
  missing = [f for f in fields if f not in df.columns]
403
  if missing:
404
  raise ValueError(f"Le seguenti colonne mancano nel DataFrame: {missing}")
@@ -414,7 +417,7 @@ def mean_of_max_per_field(df):
414
 
415
  def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
416
  if tasks is None:
417
- tasks = [ "NER", "REL", "RML", "DIA", "HIS"]
418
 
419
  task_means = {}
420
 
@@ -489,7 +492,7 @@ def boxplot_per_task(dataframe=None, baselines=None, references=None):
489
  #print(dataframe.columns)
490
 
491
  #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
492
- tasks =["NER", "REL", "RML", "HIS", "DIA"]
493
  if dataframe is None:
494
  np.random.seed(42)
495
  dataframe = pd.DataFrame({
@@ -598,7 +601,7 @@ REFERENCES = {
598
 
599
  def boxplot_prompts_per_task(dataframe, tasks=None):
600
  if tasks is None:
601
- tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
602
 
603
  # Lista delle colonne da aggiornare
604
  cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
@@ -808,11 +811,13 @@ TASK_METADATA_MULTIPLECHOICE = {
808
  # Define task metadata (icons, names, descriptions)
809
  TASK_METADATA_GENERATIVE = {
810
 
811
- "NER": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
812
- "REL": {"icon": "πŸ”—", "name": "Relation Extraction", "tooltip": ""},
813
- "RML": {"icon": "πŸ˜ƒ", "name": "CRF RML", "tooltip": "CRF RML"},
814
- "DIA": {"icon": "πŸ₯", "name": "CRF Diagnosis", "tooltip": "CRF Diagnosis"},
815
- "HIS": {"icon": "πŸ“", "name": "CRF History", "tooltip": "CRF History"},
 
 
816
  }
817
 
818
  def restart_space():
@@ -891,7 +896,7 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
891
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
892
  filter_columns=[
893
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
894
- ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges: "),
895
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
896
  ],
897
  bool_checkboxgroup_label="Evaluation Mode",
@@ -975,7 +980,7 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
975
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
976
  filter_columns=[
977
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
978
- ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges: "),
979
 
980
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
981
  label="Select the number of parameters (B)"),
@@ -1075,8 +1080,8 @@ with demo:
1075
 
1076
  leaderboard = init_leaderboard(
1077
  LEADERBOARD_DF,
1078
- default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML", "DIA", "HIS"],
1079
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML", "DIA", "HIS"]]
1080
  )
1081
 
1082
 
@@ -1107,7 +1112,7 @@ with demo:
1107
  with gr.TabItem(f"{metadata['icon']}{task}"):
1108
  task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
1109
  gr.Markdown(task_description, elem_classes="markdown-text1")
1110
- #print (LEADERBOARD_DF)
1111
  leaderboard = update_task_leaderboard(
1112
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average",
1113
  f"{task} Prompt Std": "Prompt Std",
 
52
  - lang in {EN, IT, SL, SK, GR, PL} or None/"All"
53
  - shot in {"0","10"} or None/"All" (mapped to IS_FS False/True)
54
  """
55
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
56
  df = dataframe.copy()
57
 
58
  if lang and lang != "All" and "LANG" in df.columns:
 
141
  Table with the best overall model per task (NER, REL,) and the model that
142
  achieves the best score with its own best prompt.
143
  """
144
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
145
  table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
146
 
147
  for task in tasks:
 
216
  - lang: None or one of EN/IT/SL/SK/GR/PL (None means All)
217
  - shot: None or "0"/"10" (None means All) mapped to IS_FS False/True
218
  """
219
+ tasks = ["NER-E3C", "REL-E3C", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHA"]
220
 
221
  df = dataframe.copy()
222
  # Language filter
 
236
  for task in tasks:
237
  col = f"{task} Best Prompt Id"
238
  if col in df.columns:
239
+ #print (col)
240
+ #print(df[col])
241
  all_ids.update(df[col].dropna().unique())
242
  prompt_ids_raw = sorted(list(all_ids), key=lambda x: int(re.sub(r'[^0-9]', '', str(x)) or 0))
243
  prompt_ids_raw = [pid for pid in prompt_ids_raw if label_for(pid) in {"p1", "p2", "p3"}] or [1, 2, 3]
 
296
  for tasks NER and REL, with exactly 3 prompts (p1, p2, p3). It supports columns storing
297
  ids as integers (1/2/3) or strings ('p1'/'p2'/'p3').
298
  """
299
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
300
 
301
  # Collect unique prompt ids as they appear (int or 'pX'); restrict to 3 prompts
302
  all_ids = set()
 
397
  float: media dei valori massimi dei campi
398
  """
399
  #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
400
+ fields = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"]
401
  #print(df.columns)
402
 
403
  # Controlla che tutte le colonne esistano nel DataFrame
404
+ print(df.columns)
405
  missing = [f for f in fields if f not in df.columns]
406
  if missing:
407
  raise ValueError(f"Le seguenti colonne mancano nel DataFrame: {missing}")
 
417
 
418
  def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
419
  if tasks is None:
420
+ tasks = [ "NER-E3C", "REL-E3C", "RML-CRF", "DIA-CRF", "HIS-CRF","NER-PHA"]
421
 
422
  task_means = {}
423
 
 
492
  #print(dataframe.columns)
493
 
494
  #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
495
+ tasks =["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA" , "NER-PHA"]
496
  if dataframe is None:
497
  np.random.seed(42)
498
  dataframe = pd.DataFrame({
 
601
 
602
  def boxplot_prompts_per_task(dataframe, tasks=None):
603
  if tasks is None:
604
+ tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C"]
605
 
606
  # Lista delle colonne da aggiornare
607
  cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
 
811
  # Define task metadata (icons, names, descriptions)
812
  TASK_METADATA_GENERATIVE = {
813
 
814
+ "NER-E3C": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
815
+ "REL-E3C": {"icon": "πŸ”—", "name": "Relation Extraction", "tooltip": ""},
816
+ "CRF-RML": {"icon": "πŸ”—", "name": "CRF RML", "tooltip": "CRF RML"},
817
+ "CRF-DIA": {"icon": "πŸ₯", "name": "CRF Diagnosis", "tooltip": "CRF Diagnosis"},
818
+ "CRF-HIS": {"icon": "πŸ“", "name": "CRF History", "tooltip": "CRF History"},
819
+ "NER-PHA": {"icon": "🏷️", "name": "Named Entity Recognition over PharmaER.It Datasets", "tooltip": ""},
820
+
821
  }
822
 
823
  def restart_space():
 
896
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
897
  filter_columns=[
898
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
899
+ ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languages: "),
900
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
901
  ],
902
  bool_checkboxgroup_label="Evaluation Mode",
 
980
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
981
  filter_columns=[
982
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
983
+ ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languages: "),
984
 
985
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
986
  label="Select the number of parameters (B)"),
 
1080
 
1081
  leaderboard = init_leaderboard(
1082
  LEADERBOARD_DF,
1083
+ default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"],
1084
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"]]
1085
  )
1086
 
1087
 
 
1112
  with gr.TabItem(f"{metadata['icon']}{task}"):
1113
  task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
1114
  gr.Markdown(task_description, elem_classes="markdown-text1")
1115
+ print (task)
1116
  leaderboard = update_task_leaderboard(
1117
  LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average",
1118
  f"{task} Prompt Std": "Prompt Std",
app.py CHANGED
@@ -52,7 +52,7 @@ def create_best_model_comparison_table(dataframe, lang: str | None = None, shot:
52
  - lang in {EN, IT, SL, SK, GR, PL} or None/"All"
53
  - shot in {"0","10"} or None/"All" (mapped to IS_FS False/True)
54
  """
55
- tasks = ["NER", "REL", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHARMAER"]
56
  df = dataframe.copy()
57
 
58
  if lang and lang != "All" and "LANG" in df.columns:
@@ -141,7 +141,7 @@ def create_best_model_comparison_table_without_lang(dataframe):
141
  Table with the best overall model per task (NER, REL,) and the model that
142
  achieves the best score with its own best prompt.
143
  """
144
- tasks = ["NER", "REL", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHARMAER"]
145
  table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
146
 
147
  for task in tasks:
@@ -216,7 +216,7 @@ def create_prompt_heatmap(dataframe, lang: str | None = None, shot: str | None =
216
  - lang: None or one of EN/IT/SL/SK/GR/PL (None means All)
217
  - shot: None or "0"/"10" (None means All) mapped to IS_FS False/True
218
  """
219
- tasks = ["NER", "REL", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHARMAER"]
220
 
221
  df = dataframe.copy()
222
  # Language filter
@@ -296,7 +296,7 @@ def create_prompt_heatmap_without_lang(dataframe):
296
  for tasks NER and REL, with exactly 3 prompts (p1, p2, p3). It supports columns storing
297
  ids as integers (1/2/3) or strings ('p1'/'p2'/'p3').
298
  """
299
- tasks = ["NER", "REL", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHARMAER"]
300
 
301
  # Collect unique prompt ids as they appear (int or 'pX'); restrict to 3 prompts
302
  all_ids = set()
@@ -397,7 +397,7 @@ def mean_of_max_per_field(df):
397
  float: media dei valori massimi dei campi
398
  """
399
  #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
400
- fields = ["NER", "REL", "RML-CRF", "DIA-CRF", "HIS-CRF", "NER-PHARMAER"]
401
  #print(df.columns)
402
 
403
  # Controlla che tutte le colonne esistano nel DataFrame
@@ -417,7 +417,7 @@ def mean_of_max_per_field(df):
417
 
418
  def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
419
  if tasks is None:
420
- tasks = [ "NER", "REL", "RML-CRF", "DIA-CRF", "HIS-CRF","NER-PHARMAER"]
421
 
422
  task_means = {}
423
 
@@ -492,7 +492,7 @@ def boxplot_per_task(dataframe=None, baselines=None, references=None):
492
  #print(dataframe.columns)
493
 
494
  #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
495
- tasks =["NER", "REL", "RML-CRF", "HIS-CRF", "DIA-CRF" , "NER-PHARMAER"]
496
  if dataframe is None:
497
  np.random.seed(42)
498
  dataframe = pd.DataFrame({
@@ -601,7 +601,7 @@ REFERENCES = {
601
 
602
  def boxplot_prompts_per_task(dataframe, tasks=None):
603
  if tasks is None:
604
- tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
605
 
606
  # Lista delle colonne da aggiornare
607
  cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
@@ -811,12 +811,12 @@ TASK_METADATA_MULTIPLECHOICE = {
811
  # Define task metadata (icons, names, descriptions)
812
  TASK_METADATA_GENERATIVE = {
813
 
814
- "NER": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
815
- "REL": {"icon": "πŸ”—", "name": "Relation Extraction", "tooltip": ""},
816
- "RML-CRF": {"icon": "πŸ˜ƒ", "name": "CRF RML", "tooltip": "CRF RML"},
817
- "DIA-CRF": {"icon": "πŸ₯", "name": "CRF Diagnosis", "tooltip": "CRF Diagnosis"},
818
- "HIS-CRF": {"icon": "πŸ“", "name": "CRF History", "tooltip": "CRF History"},
819
- "NER-PHARMAER": {"icon": "🏷️", "name": "Named Entity Recognition over PharmaER.It Datasets", "tooltip": ""},
820
 
821
  }
822
 
@@ -896,7 +896,7 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
896
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
897
  filter_columns=[
898
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
899
- ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges: "),
900
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
901
  ],
902
  bool_checkboxgroup_label="Evaluation Mode",
@@ -980,7 +980,7 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
980
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
981
  filter_columns=[
982
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
983
- ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges: "),
984
 
985
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
986
  label="Select the number of parameters (B)"),
@@ -1080,8 +1080,8 @@ with demo:
1080
 
1081
  leaderboard = init_leaderboard(
1082
  LEADERBOARD_DF,
1083
- default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML-CRF", "DIA-CRF", "HIS-CRF", "NER-PHARMAER"],
1084
- hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML-CRF", "DIA-CRF", "HIS-CRF", "NER-PHARMAER"]]
1085
  )
1086
 
1087
 
 
52
  - lang in {EN, IT, SL, SK, GR, PL} or None/"All"
53
  - shot in {"0","10"} or None/"All" (mapped to IS_FS False/True)
54
  """
55
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
56
  df = dataframe.copy()
57
 
58
  if lang and lang != "All" and "LANG" in df.columns:
 
141
  Table with the best overall model per task (NER, REL,) and the model that
142
  achieves the best score with its own best prompt.
143
  """
144
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
145
  table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
146
 
147
  for task in tasks:
 
216
  - lang: None or one of EN/IT/SL/SK/GR/PL (None means All)
217
  - shot: None or "0"/"10" (None means All) mapped to IS_FS False/True
218
  """
219
+ tasks = ["NER-E3C", "REL-E3C", "RML-CRF", "HIS-CRF", "DIA-CRF", "NER-PHA"]
220
 
221
  df = dataframe.copy()
222
  # Language filter
 
296
  for tasks NER and REL, with exactly 3 prompts (p1, p2, p3). It supports columns storing
297
  ids as integers (1/2/3) or strings ('p1'/'p2'/'p3').
298
  """
299
+ tasks = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA", "NER-PHA"]
300
 
301
  # Collect unique prompt ids as they appear (int or 'pX'); restrict to 3 prompts
302
  all_ids = set()
 
397
  float: media dei valori massimi dei campi
398
  """
399
  #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
400
+ fields = ["NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"]
401
  #print(df.columns)
402
 
403
  # Controlla che tutte le colonne esistano nel DataFrame
 
417
 
418
  def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
419
  if tasks is None:
420
+ tasks = [ "NER-E3C", "REL-E3C", "RML-CRF", "DIA-CRF", "HIS-CRF","NER-PHA"]
421
 
422
  task_means = {}
423
 
 
492
  #print(dataframe.columns)
493
 
494
  #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
495
+ tasks =["NER-E3C", "REL-E3C", "CRF-RML", "CRF-HIS", "CRF-DIA" , "NER-PHA"]
496
  if dataframe is None:
497
  np.random.seed(42)
498
  dataframe = pd.DataFrame({
 
601
 
602
  def boxplot_prompts_per_task(dataframe, tasks=None):
603
  if tasks is None:
604
+ tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C"]
605
 
606
  # Lista delle colonne da aggiornare
607
  cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
 
811
  # Define task metadata (icons, names, descriptions)
812
  TASK_METADATA_GENERATIVE = {
813
 
814
+ "NER-E3C": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
815
+ "REL-E3C": {"icon": "πŸ”—", "name": "Relation Extraction", "tooltip": ""},
816
+ "CRF-RML": {"icon": "πŸ”—", "name": "CRF RML", "tooltip": "CRF RML"},
817
+ "CRF-DIA": {"icon": "πŸ₯", "name": "CRF Diagnosis", "tooltip": "CRF Diagnosis"},
818
+ "CRF-HIS": {"icon": "πŸ“", "name": "CRF History", "tooltip": "CRF History"},
819
+ "NER-PHA": {"icon": "🏷️", "name": "Named Entity Recognition over PharmaER.It Datasets", "tooltip": ""},
820
 
821
  }
822
 
 
896
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
897
  filter_columns=[
898
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
899
+ ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languages: "),
900
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
901
  ],
902
  bool_checkboxgroup_label="Evaluation Mode",
 
980
  hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
981
  filter_columns=[
982
  ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
983
+ ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languages: "),
984
 
985
  ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
986
  label="Select the number of parameters (B)"),
 
1080
 
1081
  leaderboard = init_leaderboard(
1082
  LEADERBOARD_DF,
1083
+ default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"],
1084
+ hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER-E3C", "REL-E3C", "CRF-RML", "CRF-DIA", "CRF-HIS", "NER-PHA"]]
1085
  )
1086
 
1087
 
src/__pycache__/about.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/about.cpython-310.pyc and b/src/__pycache__/about.cpython-310.pyc differ
 
src/about.py CHANGED
@@ -72,43 +72,43 @@ class Tasks(Enum):
72
  #task48 = Task("relation-extraction_5", "acc", "std_accuracy", "REL Prompt Std")
73
  #task49 = Task("relation-extraction_3", "acc", "best_prompt", "REL Best Prompt")
74
  #task50 = Task("relation-extraction_4", "acc", "prompt_id", "REL Best Prompt Id")
75
- task1 = Task("RE_1", "acc", "CPS", "REL")
76
- task2 = Task("RE_2", "acc", "average_accuracy", "REL Prompt Average")
77
- task3 = Task("RE_5", "acc", "std_accuracy", "REL Prompt Std")
78
- task4 = Task("RE_3", "acc", "best_prompt", "REL Best Prompt")
79
- task5 = Task("RE_4", "acc", "prompt_id", "REL Best Prompt Id")
80
-
81
- task6 = Task("NER_1", "acc", "CPS", "NER")
82
- task7 = Task("NER_2", "acc", "average_accuracy", "NER Prompt Average")
83
- task8 = Task("NER_3", "acc", "std_accuracy", "NER Prompt Std")
84
- task9 = Task("NER_4", "acc", "best_prompt", "NER Best Prompt")
85
- task10 = Task("NER_5", "acc", "prompt_id", "NER Best Prompt Id")
86
-
87
- task11 = Task("RML-CRF_1", "acc", "CPS", "RML-CRF")
88
- task12 = Task("RML-CRF_2", "acc", "average_accuracy", "RML-CRF Prompt Average")
89
- task13 = Task("RML-CRF_3", "acc", "std_accuracy", "RML-CRF Prompt Std")
90
- task14 = Task("RML-CRF_4", "acc", "best_prompt", "RML-CRF Best Prompt")
91
- task15 = Task("RML-CRF_5", "acc", "prompt_id", "RML-CRF Best Prompt Id")
92
-
93
-
94
-
95
- task16 = Task("DIA-CRF_1", "acc", "CPS", "DIA-CRF")
96
- task17 = Task("DIA-CRF_2", "acc", "average_accuracy", "DIA-CRF Prompt Average")
97
- task18 = Task("DIA-CRF_3", "acc", "std_accuracy", "DIA-CRF Prompt Std")
98
- task19 = Task("DIA-CRF_4", "acc", "best_prompt", "DIA-CRF Best Prompt")
99
- task20 = Task("DIA-CRF_5", "acc", "prompt_id", "DIA-CRF Best Prompt Id")
100
-
101
- task21 = Task("HIS-CRF_1", "acc", "CPS", "HIS-CRF")
102
- task22 = Task("HIS-CRF_2", "acc", "average_accuracy", "HIS-CRF Prompt Average")
103
- task23 = Task("HIS-CRF_3", "acc", "std_accuracy", "HIS-CRF Prompt Std")
104
- task24 = Task("HIS-CRF_4", "acc", "best_prompt", "HIS-CRF Best Prompt")
105
- task25 = Task("HIS-CRF_5", "acc", "prompt_id", "HIS-CRF Best Prompt Id")
106
-
107
- task26 = Task("NER-PHARMAER_1", "acc", "CPS", "NER-PHARMAER")
108
- task27 = Task("NER-PHARMAER_2", "acc", "average_accuracy", "NER-PHARMAER Prompt Average")
109
- task28 = Task("NER-PHARMAER_3", "acc", "std_accuracy", "NER-PHARMAER Prompt Std")
110
- task29 = Task("NER-PHARMAER_4", "acc", "best_prompt", "NER-PHARMAER Best Prompt")
111
- task30 = Task("NER-PHARMAER_5", "acc", "prompt_id", "NER-PHARMAER Best Prompt Id")
112
 
113
  '''
114
  task0 = Task("TextualEntailment", "acc", "Textual Entailment")
 
72
  #task48 = Task("relation-extraction_5", "acc", "std_accuracy", "REL Prompt Std")
73
  #task49 = Task("relation-extraction_3", "acc", "best_prompt", "REL Best Prompt")
74
  #task50 = Task("relation-extraction_4", "acc", "prompt_id", "REL Best Prompt Id")
75
+ task1 = Task("RE_1", "acc", "CPS", "REL-E3C")
76
+ task2 = Task("RE_2", "acc", "average_accuracy", "REL-E3C Prompt Average")
77
+ task3 = Task("RE_5", "acc", "std_accuracy", "REL-E3C Prompt Std")
78
+ task4 = Task("RE_3", "acc", "best_prompt", "REL-E3C Best Prompt")
79
+ task5 = Task("RE_4", "acc", "prompt_id", "REL-E3C Best Prompt Id")
80
+
81
+ task6 = Task("NER_1", "acc", "CPS", "NER-E3C")
82
+ task7 = Task("NER_2", "acc", "average_accuracy", "NER-E3C Prompt Average")
83
+ task8 = Task("NER_3", "acc", "std_accuracy", "NER-E3C Prompt Std")
84
+ task9 = Task("NER_4", "acc", "best_prompt", "NER-E3C Best Prompt")
85
+ task10 = Task("NER_5", "acc", "prompt_id", "NER-E3C Best Prompt Id")
86
+
87
+ task11 = Task("RML-CRF_1", "acc", "CPS", "CRF-RML")
88
+ task12 = Task("RML-CRF_2", "acc", "average_accuracy", "CRF-RML Prompt Average")
89
+ task13 = Task("RML-CRF_3", "acc", "std_accuracy", "CRF-RML Prompt Std")
90
+ task14 = Task("RML-CRF_4", "acc", "best_prompt", "CRF-RML Best Prompt")
91
+ task15 = Task("RML-CRF_5", "acc", "prompt_id", "CRF-RML Best Prompt Id")
92
+
93
+
94
+
95
+ task16 = Task("DIA-CRF_1", "acc", "CPS", "CRF-DIA")
96
+ task17 = Task("DIA-CRF_2", "acc", "average_accuracy", "CRF-DIA Prompt Average")
97
+ task18 = Task("DIA-CRF_3", "acc", "std_accuracy", "CRF-DIA Prompt Std")
98
+ task19 = Task("DIA-CRF_4", "acc", "best_prompt", "CRF-DIA Best Prompt")
99
+ task20 = Task("DIA-CRF_5", "acc", "prompt_id", "CRF-DIA Best Prompt Id")
100
+
101
+ task21 = Task("HIS-CRF_1", "acc", "CPS", "CRF-HIS")
102
+ task22 = Task("HIS-CRF_2", "acc", "average_accuracy", "CRF-HIS Prompt Average")
103
+ task23 = Task("HIS-CRF_3", "acc", "std_accuracy", "CRF-HIS Prompt Std")
104
+ task24 = Task("HIS-CRF_4", "acc", "best_prompt", "CRF-HIS Best Prompt")
105
+ task25 = Task("HIS-CRF_5", "acc", "prompt_id", "CRF-HIS Best Prompt Id")
106
+
107
+ task26 = Task("NER-PHARMAER_1", "acc", "CPS", "NER-PHA")
108
+ task27 = Task("NER-PHARMAER_2", "acc", "average_accuracy", "NER-PHA Prompt Average")
109
+ task28 = Task("NER-PHARMAER_3", "acc", "std_accuracy", "NER-PHA Prompt Std")
110
+ task29 = Task("NER-PHARMAER_4", "acc", "best_prompt", "NER-PHA Best Prompt")
111
+ task30 = Task("NER-PHARMAER_5", "acc", "prompt_id", "NER-PHA Best Prompt Id")
112
 
113
  '''
114
  task0 = Task("TextualEntailment", "acc", "Textual Entailment")