Gül Sena Altıntaş commited on
Commit
b3de8c3
·
1 Parent(s): 431425d

- Save with prefix

Browse files

- Two new models added

Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +17 -2
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ summaries/
app.py CHANGED
@@ -109,6 +109,8 @@ TOKSUITE_MODELS = [
109
  "mistralai-tekken",
110
  "tokenmonster-englishcode-32000-consistent-v1",
111
  "google-byt5-small",
 
 
112
  ]
113
  # Global cache for loaded models
114
  model_cache = dict()
@@ -440,6 +442,7 @@ def run_evaluation(
440
  progress=gr.Progress(),
441
  save_summary=False,
442
  normalization_method: str = "token-length",
 
443
  ):
444
  import gc
445
 
@@ -558,7 +561,10 @@ def run_evaluation(
558
  csv_summary = generate_csv_summary(questions, results, summary_stats)
559
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
560
  if save_summary and slurm_id:
561
- with open(f"{slurm_id}_summary_{time()}.md", "w") as f:
 
 
 
562
  f.write(markdown_summary)
563
 
564
  return (
@@ -1151,6 +1157,7 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
1151
  )
1152
  with gr.Column(scale=1):
1153
  save_summary_checkbox = False
 
1154
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
1155
  if slurm_id:
1156
  save_summary_checkbox = gr.Checkbox(
@@ -1159,12 +1166,19 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
1159
  value=False,
1160
  # info="If checked, saves a markdown summary file with SLURM_JOB_ID prefix",
1161
  )
 
 
 
 
 
 
 
1162
  with gr.Row():
1163
  with gr.Column(scale=2):
1164
  toksuite_selector = gr.CheckboxGroup(
1165
  label="Select toksuite models",
1166
  choices=TOKSUITE_MODELS,
1167
- value=TOKSUITE_MODELS[-3:],
1168
  interactive=True,
1169
  info="These models share the same initialization and training source but differ only in their tokenizers. See [r-three/toksuite](https://huggingface.co/collections/r-three/toksuite-68ae7490c151341d78423295) for details.",
1170
  )
@@ -1277,6 +1291,7 @@ bigscience/bloom-560m""",
1277
  delimiter_selector,
1278
  save_summary_checkbox,
1279
  normalization_method,
 
1280
  ],
1281
  outputs=[
1282
  summary_output,
 
109
  "mistralai-tekken",
110
  "tokenmonster-englishcode-32000-consistent-v1",
111
  "google-byt5-small",
112
+ "google-bert-bert-base-multilingual-cased",
113
+ "Qwen-Qwen3-8B",
114
  ]
115
  # Global cache for loaded models
116
  model_cache = dict()
 
442
  progress=gr.Progress(),
443
  save_summary=False,
444
  normalization_method: str = "token-length",
445
+ prefix: str = "",
446
  ):
447
  import gc
448
 
 
561
  csv_summary = generate_csv_summary(questions, results, summary_stats)
562
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
563
  if save_summary and slurm_id:
564
+ file_name = f"summaries/{slurm_id}_summary_{time()}.md"
565
+ if prefix:
566
+ file_name = f"summaries/{slurm_id}_{prefix}_summary_{time()}.md"
567
+ with open(file_name, "w") as f:
568
  f.write(markdown_summary)
569
 
570
  return (
 
1157
  )
1158
  with gr.Column(scale=1):
1159
  save_summary_checkbox = False
1160
+ prefix = ""
1161
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
1162
  if slurm_id:
1163
  save_summary_checkbox = gr.Checkbox(
 
1166
  value=False,
1167
  # info="If checked, saves a markdown summary file with SLURM_JOB_ID prefix",
1168
  )
1169
+ prefix = gr.Textbox(
1170
+ label="Filename Prefix",
1171
+ placeholder="SLURM_JOB_ID will be used by default",
1172
+ value="",
1173
+ interactive=True,
1174
+ visible=True,
1175
+ )
1176
  with gr.Row():
1177
  with gr.Column(scale=2):
1178
  toksuite_selector = gr.CheckboxGroup(
1179
  label="Select toksuite models",
1180
  choices=TOKSUITE_MODELS,
1181
+ value=TOKSUITE_MODELS,
1182
  interactive=True,
1183
  info="These models share the same initialization and training source but differ only in their tokenizers. See [r-three/toksuite](https://huggingface.co/collections/r-three/toksuite-68ae7490c151341d78423295) for details.",
1184
  )
 
1291
  delimiter_selector,
1292
  save_summary_checkbox,
1293
  normalization_method,
1294
+ prefix,
1295
  ],
1296
  outputs=[
1297
  summary_output,