Spaces:
Runtime error
Runtime error
Gül Sena Altıntaş commited on
Commit ·
b3de8c3
1
Parent(s): 431425d
- Save with prefix
Browse files- Two new models added
- .gitignore +1 -0
- app.py +17 -2
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
summaries/
|
app.py
CHANGED
|
@@ -109,6 +109,8 @@ TOKSUITE_MODELS = [
|
|
| 109 |
"mistralai-tekken",
|
| 110 |
"tokenmonster-englishcode-32000-consistent-v1",
|
| 111 |
"google-byt5-small",
|
|
|
|
|
|
|
| 112 |
]
|
| 113 |
# Global cache for loaded models
|
| 114 |
model_cache = dict()
|
|
@@ -440,6 +442,7 @@ def run_evaluation(
|
|
| 440 |
progress=gr.Progress(),
|
| 441 |
save_summary=False,
|
| 442 |
normalization_method: str = "token-length",
|
|
|
|
| 443 |
):
|
| 444 |
import gc
|
| 445 |
|
|
@@ -558,7 +561,10 @@ def run_evaluation(
|
|
| 558 |
csv_summary = generate_csv_summary(questions, results, summary_stats)
|
| 559 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 560 |
if save_summary and slurm_id:
|
| 561 |
-
|
|
|
|
|
|
|
|
|
|
| 562 |
f.write(markdown_summary)
|
| 563 |
|
| 564 |
return (
|
|
@@ -1151,6 +1157,7 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
|
|
| 1151 |
)
|
| 1152 |
with gr.Column(scale=1):
|
| 1153 |
save_summary_checkbox = False
|
|
|
|
| 1154 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 1155 |
if slurm_id:
|
| 1156 |
save_summary_checkbox = gr.Checkbox(
|
|
@@ -1159,12 +1166,19 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
|
|
| 1159 |
value=False,
|
| 1160 |
# info="If checked, saves a markdown summary file with SLURM_JOB_ID prefix",
|
| 1161 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1162 |
with gr.Row():
|
| 1163 |
with gr.Column(scale=2):
|
| 1164 |
toksuite_selector = gr.CheckboxGroup(
|
| 1165 |
label="Select toksuite models",
|
| 1166 |
choices=TOKSUITE_MODELS,
|
| 1167 |
-
value=TOKSUITE_MODELS
|
| 1168 |
interactive=True,
|
| 1169 |
info="These models share the same initialization and training source but differ only in their tokenizers. See [r-three/toksuite](https://huggingface.co/collections/r-three/toksuite-68ae7490c151341d78423295) for details.",
|
| 1170 |
)
|
|
@@ -1277,6 +1291,7 @@ bigscience/bloom-560m""",
|
|
| 1277 |
delimiter_selector,
|
| 1278 |
save_summary_checkbox,
|
| 1279 |
normalization_method,
|
|
|
|
| 1280 |
],
|
| 1281 |
outputs=[
|
| 1282 |
summary_output,
|
|
|
|
| 109 |
"mistralai-tekken",
|
| 110 |
"tokenmonster-englishcode-32000-consistent-v1",
|
| 111 |
"google-byt5-small",
|
| 112 |
+
"google-bert-bert-base-multilingual-cased",
|
| 113 |
+
"Qwen-Qwen3-8B",
|
| 114 |
]
|
| 115 |
# Global cache for loaded models
|
| 116 |
model_cache = dict()
|
|
|
|
| 442 |
progress=gr.Progress(),
|
| 443 |
save_summary=False,
|
| 444 |
normalization_method: str = "token-length",
|
| 445 |
+
prefix: str = "",
|
| 446 |
):
|
| 447 |
import gc
|
| 448 |
|
|
|
|
| 561 |
csv_summary = generate_csv_summary(questions, results, summary_stats)
|
| 562 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 563 |
if save_summary and slurm_id:
|
| 564 |
+
file_name = f"summaries/{slurm_id}_summary_{time()}.md"
|
| 565 |
+
if prefix:
|
| 566 |
+
file_name = f"summaries/{slurm_id}_{prefix}_summary_{time()}.md"
|
| 567 |
+
with open(file_name, "w") as f:
|
| 568 |
f.write(markdown_summary)
|
| 569 |
|
| 570 |
return (
|
|
|
|
| 1157 |
)
|
| 1158 |
with gr.Column(scale=1):
|
| 1159 |
save_summary_checkbox = False
|
| 1160 |
+
prefix = ""
|
| 1161 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 1162 |
if slurm_id:
|
| 1163 |
save_summary_checkbox = gr.Checkbox(
|
|
|
|
| 1166 |
value=False,
|
| 1167 |
# info="If checked, saves a markdown summary file with SLURM_JOB_ID prefix",
|
| 1168 |
)
|
| 1169 |
+
prefix = gr.Textbox(
|
| 1170 |
+
label="Filename Prefix",
|
| 1171 |
+
placeholder="SLURM_JOB_ID will be used by default",
|
| 1172 |
+
value="",
|
| 1173 |
+
interactive=True,
|
| 1174 |
+
visible=True,
|
| 1175 |
+
)
|
| 1176 |
with gr.Row():
|
| 1177 |
with gr.Column(scale=2):
|
| 1178 |
toksuite_selector = gr.CheckboxGroup(
|
| 1179 |
label="Select toksuite models",
|
| 1180 |
choices=TOKSUITE_MODELS,
|
| 1181 |
+
value=TOKSUITE_MODELS,
|
| 1182 |
interactive=True,
|
| 1183 |
info="These models share the same initialization and training source but differ only in their tokenizers. See [r-three/toksuite](https://huggingface.co/collections/r-three/toksuite-68ae7490c151341d78423295) for details.",
|
| 1184 |
)
|
|
|
|
| 1291 |
delimiter_selector,
|
| 1292 |
save_summary_checkbox,
|
| 1293 |
normalization_method,
|
| 1294 |
+
prefix,
|
| 1295 |
],
|
| 1296 |
outputs=[
|
| 1297 |
summary_output,
|