Upload mod_tokenizers/flexitok--mod-tokenizers-ltr_4digit_overlap.json with huggingface_hub
Browse files
mod_tokenizers/flexitok--mod-tokenizers-ltr_4digit_overlap.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"4": {"ratio_to_total_tokens": 0.899370440691516, "expected_training_ratio_in_superset": 0.399720195862896, "num_tokens": 9000}, "6": {"ratio_to_total_tokens": 0.0899370440691516, "expected_training_ratio_in_superset": 0.0599580293794344, "num_tokens": 900}, "8": {"ratio_to_total_tokens": 0.00899370440691516, "expected_training_ratio_in_superset": 0.00799440391725792, "num_tokens": 90}, "9": {"ratio_to_total_tokens": 0.0016988108324173079, "expected_training_ratio_in_superset": 0.0016988108324173079, "num_tokens": 17}, "total_training_compared_to_full_model": 0.4693714399920056}
|