Spaces:
Sleeping
Sleeping
mj-new
commited on
Commit
·
7504a25
1
Parent(s):
db4641e
Added hardcoded list of configs for PELCRA due to gated access
Browse files
app.py
CHANGED
|
@@ -94,9 +94,13 @@ with analysis_bigos_pelcra:
|
|
| 94 |
|
| 95 |
dataset_short_name = "PELCRA"
|
| 96 |
|
| 97 |
-
|
|
|
|
| 98 |
# remove "all" subset, which is always the last config type
|
| 99 |
-
dataset_configs.pop()
|
|
|
|
|
|
|
|
|
|
| 100 |
print(dataset_configs)
|
| 101 |
# read the reports for public and secret datasets
|
| 102 |
[stats_dict_public, contents_dict_public] = read_reports(dataset_name)
|
|
|
|
| 94 |
|
| 95 |
dataset_short_name = "PELCRA"
|
| 96 |
|
| 97 |
+
# local version with granted gated access
|
| 98 |
+
#dataset_configs = get_dataset_config_names(dataset_name,trust_remote_code=True)
|
| 99 |
# remove "all" subset, which is always the last config type
|
| 100 |
+
#dataset_configs.pop()
|
| 101 |
+
|
| 102 |
+
# remote version with hardcoded access
|
| 103 |
+
dataset_configs = ['ul-diabiz_poleval-22', 'ul-spokes_mix_emo-18', 'ul-spokes_mix_luz-18', 'ul-spokes_mix_parl-18', 'ul-spokes_biz_bio-23', 'ul-spokes_biz_int-23', 'ul-spokes_biz_luz-23', 'ul-spokes_biz_pod-23', 'ul-spokes_biz_pres-23', 'ul-spokes_biz_vc-23', 'ul-spokes_biz_vc2-23', 'ul-spokes_biz_wyw-23']
|
| 104 |
print(dataset_configs)
|
| 105 |
# read the reports for public and secret datasets
|
| 106 |
[stats_dict_public, contents_dict_public] = read_reports(dataset_name)
|
utils.py
CHANGED
|
@@ -539,7 +539,7 @@ def dict_to_multindex_df_all_splits(dict_in):
|
|
| 539 |
return(df)
|
| 540 |
|
| 541 |
|
| 542 |
-
def extract_stats_to_agg(df_multindex_per_split, metrics):
|
| 543 |
# input - multiindex dataframe has three indexes - dataset, metric, split
|
| 544 |
|
| 545 |
# select only relevant metrics
|
|
@@ -558,6 +558,9 @@ def extract_stats_to_agg(df_multindex_per_split, metrics):
|
|
| 558 |
# move rows corresponding to specific metrics into specific columns
|
| 559 |
df_agg_splits = df_agg_splits.unstack(level ='metric')
|
| 560 |
df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
return(df_agg_splits)
|
| 563 |
|
|
@@ -577,7 +580,7 @@ def extract_stats_all_splits(df_multiindex_all_splits, metrics):
|
|
| 577 |
|
| 578 |
def extract_stats_for_dataset_card(df_multindex_per_split, subset, metrics, add_total=False):
|
| 579 |
|
| 580 |
-
print(df_multindex_per_split)
|
| 581 |
df_metrics_subset = df_multindex_per_split
|
| 582 |
|
| 583 |
df_metrics_subset = df_metrics_subset.unstack(level ='split')
|
|
|
|
| 539 |
return(df)
|
| 540 |
|
| 541 |
|
| 542 |
+
def extract_stats_to_agg(df_multindex_per_split, metrics, add_total=True):
|
| 543 |
# input - multiindex dataframe has three indexes - dataset, metric, split
|
| 544 |
|
| 545 |
# select only relevant metrics
|
|
|
|
| 558 |
# move rows corresponding to specific metrics into specific columns
|
| 559 |
df_agg_splits = df_agg_splits.unstack(level ='metric')
|
| 560 |
df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
|
| 561 |
+
|
| 562 |
+
if(add_total):
|
| 563 |
+
df_agg_splits = df_agg_splits
|
| 564 |
|
| 565 |
return(df_agg_splits)
|
| 566 |
|
|
|
|
| 580 |
|
| 581 |
def extract_stats_for_dataset_card(df_multindex_per_split, subset, metrics, add_total=False):
|
| 582 |
|
| 583 |
+
#print(df_multindex_per_split)
|
| 584 |
df_metrics_subset = df_multindex_per_split
|
| 585 |
|
| 586 |
df_metrics_subset = df_metrics_subset.unstack(level ='split')
|