Spaces:
Running
Running
Update src/helper.py
Browse files- src/helper.py +14 -4
src/helper.py
CHANGED
|
@@ -5,7 +5,7 @@ import json
|
|
| 5 |
import numpy as np
|
| 6 |
from statistics import mean
|
| 7 |
import re
|
| 8 |
-
from datasets import load_dataset
|
| 9 |
import os
|
| 10 |
from collections import defaultdict
|
| 11 |
from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
|
|
@@ -53,13 +53,23 @@ TASK_TO_CLUSTER_MAP = {
|
|
| 53 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
| 54 |
|
| 55 |
def load_private_leaderboard_df():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
ds = load_dataset(
|
| 57 |
path=SAHARA_DATA,
|
| 58 |
name=None,
|
| 59 |
-
data_files=
|
| 60 |
split="train",
|
| 61 |
download_mode="force_redownload"
|
| 62 |
)
|
|
|
|
| 63 |
return ds.to_pandas()
|
| 64 |
metrics_list={
|
| 65 |
'bleu_1k':'spBleu<sup>1K</sup>',
|
|
@@ -155,9 +165,9 @@ def compare_models(model_1_name, model_2_name):
|
|
| 155 |
if pd.isna(d):
|
| 156 |
return "---"
|
| 157 |
if d > 0.001: # Model 1 is better
|
| 158 |
-
return f"<span style='color:green; font-weight:bold;'>+{d:.2f}</span>"
|
| 159 |
elif d < -0.001: # Model 2 is better
|
| 160 |
-
return f"<span style='color:red; font-weight:bold;'>{d:.2f}</span>"
|
| 161 |
else:
|
| 162 |
return f"{d:.2f}"
|
| 163 |
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
from statistics import mean
|
| 7 |
import re
|
| 8 |
+
from datasets import load_dataset, concatenate_datasets
|
| 9 |
import os
|
| 10 |
from collections import defaultdict
|
| 11 |
from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
|
|
|
|
| 53 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
| 54 |
|
| 55 |
def load_private_leaderboard_df():
|
| 56 |
+
|
| 57 |
+
all_repo_files = API.list_repo_files(repo_id=SAHARA_DATA, repo_type="dataset")
|
| 58 |
+
folder_path = "data/users/"
|
| 59 |
+
jsonl_files_in_folder = [
|
| 60 |
+
f for f in all_repo_files
|
| 61 |
+
if f.startswith(folder_path) and f.endswith(".jsonl")
|
| 62 |
+
]
|
| 63 |
+
jsonl_files_in_folder.append(SAHARA_RESULTS)
|
| 64 |
+
print("++++++",jsonl_files_in_folder)
|
| 65 |
ds = load_dataset(
|
| 66 |
path=SAHARA_DATA,
|
| 67 |
name=None,
|
| 68 |
+
data_files=jsonl_files_in_folder,
|
| 69 |
split="train",
|
| 70 |
download_mode="force_redownload"
|
| 71 |
)
|
| 72 |
+
print(">>>>>>>", ds)
|
| 73 |
return ds.to_pandas()
|
| 74 |
metrics_list={
|
| 75 |
'bleu_1k':'spBleu<sup>1K</sup>',
|
|
|
|
| 165 |
if pd.isna(d):
|
| 166 |
return "---"
|
| 167 |
if d > 0.001: # Model 1 is better
|
| 168 |
+
return f"<span style='color:green !important; font-weight:bold !important;'>+{d:.2f}</span>"
|
| 169 |
elif d < -0.001: # Model 2 is better
|
| 170 |
+
return f"<span style='color:red !important; font-weight:bold !important;'>{d:.2f}</span>"
|
| 171 |
else:
|
| 172 |
return f"{d:.2f}"
|
| 173 |
|