gsaltintas commited on
Commit
fbdd891
·
verified ·
1 Parent(s): 76b2898

Upload mod_tokenizers/flexitok--mod-tokenizers-rtl_5digit_overlap.json with huggingface_hub

Browse files
mod_tokenizers/flexitok--mod-tokenizers-rtl_5digit_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"2": {"ratio_to_total_tokens": 0.8999370044096914, "expected_training_ratio_in_superset": 0.1999860009799314, "num_tokens": 90000}, "4": {"ratio_to_total_tokens": 0.08999370044096913, "expected_training_ratio_in_superset": 0.03999720019598628, "num_tokens": 9000}, "6": {"ratio_to_total_tokens": 0.008999370044096913, "expected_training_ratio_in_superset": 0.005999580029397942, "num_tokens": 900}, "8": {"ratio_to_total_tokens": 0.0008999370044096913, "expected_training_ratio_in_superset": 0.0007999440039197256, "num_tokens": 90}, "9": {"ratio_to_total_tokens": 0.0001699881008329417, "expected_training_ratio_in_superset": 0.0001699881008329417, "num_tokens": 17}, "total_training_compared_to_full_model": 0.2469527133100683}