gsaltintas commited on
Commit
80d6554
·
verified ·
1 Parent(s): 55b5cf4

Upload mod_tokenizers/flexitok--mod-tokenizers-ltr_4digit_overlap.json with huggingface_hub

Browse files
mod_tokenizers/flexitok--mod-tokenizers-ltr_4digit_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"4": {"ratio_to_total_tokens": 0.899370440691516, "expected_training_ratio_in_superset": 0.399720195862896, "num_tokens": 9000}, "6": {"ratio_to_total_tokens": 0.0899370440691516, "expected_training_ratio_in_superset": 0.0599580293794344, "num_tokens": 900}, "8": {"ratio_to_total_tokens": 0.00899370440691516, "expected_training_ratio_in_superset": 0.00799440391725792, "num_tokens": 90}, "9": {"ratio_to_total_tokens": 0.0016988108324173079, "expected_training_ratio_in_superset": 0.0016988108324173079, "num_tokens": 17}, "total_training_compared_to_full_model": 0.4693714399920056}