Gül Sena Altıntaş commited on
Commit
5d0049d
·
1 Parent(s): 9cada5f

Updated hf paths for toksuite models

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -111,6 +111,7 @@ TOKSUITE_MODELS = [
111
  "google-byt5-small",
112
  "google-bert-bert-base-multilingual-cased",
113
  "Qwen-Qwen3-8B",
 
114
  ]
115
  # Global cache for loaded models
116
  model_cache = dict()
@@ -331,7 +332,7 @@ def calculate_choice_likelihood(
331
  if normalization_method == "token-length":
332
  normalization_term = answer_len
333
  elif normalization_method == "byte-length":
334
- decoded_text = tokenizer.decode(target_ids[0]).strip()
335
  byte_len = len(decoded_text.encode("utf-8"))
336
  normalization_term = byte_len
337
  total_log_prob /= normalization_term
@@ -439,10 +440,10 @@ def run_evaluation(
439
  toksuite_selector,
440
  custom_models_text="",
441
  delimiter: str = "\t",
442
- progress=gr.Progress(),
443
  save_summary=False,
444
  normalization_method: str = "token-length",
445
  prefix: str = "",
 
446
  ):
447
  import gc
448
 
@@ -476,7 +477,7 @@ def run_evaluation(
476
  # Add predefined models
477
  all_models.extend(selected_predefined)
478
  all_models.extend(
479
- [f"r-three/supertoken_models-llama_{model}" for model in toksuite_selector]
480
  )
481
  all_models.extend(custom_models)
482
 
@@ -1192,8 +1193,8 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
1192
  lines=1,
1193
  )
1194
  with gr.Column(scale=1):
1195
- save_summary_checkbox = False
1196
- prefix = ""
1197
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
1198
  if slurm_id:
1199
  save_summary_checkbox = gr.Checkbox(
@@ -1322,7 +1323,6 @@ bigscience/bloom-560m""",
1322
  fn=run_evaluation,
1323
  inputs=[
1324
  dataset_input,
1325
- # predefined_selector,
1326
  industry_selector,
1327
  toksuite_selector,
1328
  custom_models_input,
@@ -1365,4 +1365,9 @@ bigscience/bloom-560m""",
1365
  """)
1366
 
1367
  if __name__ == "__main__":
1368
- demo.launch(share=True)
 
 
 
 
 
 
111
  "google-byt5-small",
112
  "google-bert-bert-base-multilingual-cased",
113
  "Qwen-Qwen3-8B",
114
+ "tiktoken-gpt-4o",
115
  ]
116
  # Global cache for loaded models
117
  model_cache = dict()
 
332
  if normalization_method == "token-length":
333
  normalization_term = answer_len
334
  elif normalization_method == "byte-length":
335
+ decoded_text = tokenizer.decode(target_ids).strip()
336
  byte_len = len(decoded_text.encode("utf-8"))
337
  normalization_term = byte_len
338
  total_log_prob /= normalization_term
 
440
  toksuite_selector,
441
  custom_models_text="",
442
  delimiter: str = "\t",
 
443
  save_summary=False,
444
  normalization_method: str = "token-length",
445
  prefix: str = "",
446
+ progress=gr.Progress(),
447
  ):
448
  import gc
449
 
 
477
  # Add predefined models
478
  all_models.extend(selected_predefined)
479
  all_models.extend(
480
+ [f"toksuite/{model}" for model in toksuite_selector]
481
  )
482
  all_models.extend(custom_models)
483
 
 
1193
  lines=1,
1194
  )
1195
  with gr.Column(scale=1):
1196
+ save_summary_checkbox = gr.Checkbox(value=True)
1197
+ prefix = gr.Textbox(visible=False, value="")
1198
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
1199
  if slurm_id:
1200
  save_summary_checkbox = gr.Checkbox(
 
1323
  fn=run_evaluation,
1324
  inputs=[
1325
  dataset_input,
 
1326
  industry_selector,
1327
  toksuite_selector,
1328
  custom_models_input,
 
1365
  """)
1366
 
1367
  if __name__ == "__main__":
1368
+ import argparse
1369
+
1370
+ argparser = argparse.ArgumentParser()
1371
+ argparser.add_argument("--share", action="store_true", default=True)
1372
+ args = argparser.parse_args()
1373
+ demo.launch(share=args.share)