gsaltintas commited on
Commit
a733f25
·
verified ·
1 Parent(s): 67618c9

Upload script_1/flexitok--bpe_script_Germ_32000_overlap.json with huggingface_hub

Browse files
script_1/flexitok--bpe_script_Germ_32000_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"1": {"ratio_to_total_tokens": 0.6438764643237487, "expected_training_ratio_in_superset": 0.058534224029431696, "num_tokens": 21161}, "2": {"ratio_to_total_tokens": 0.09773315076829454, "expected_training_ratio_in_superset": 0.017769663776053553, "num_tokens": 3212}, "3": {"ratio_to_total_tokens": 0.08516659059790051, "expected_training_ratio_in_superset": 0.02322725198124559, "num_tokens": 2799}, "4": {"ratio_to_total_tokens": 0.05020538566864446, "expected_training_ratio_in_superset": 0.018256503879507075, "num_tokens": 1650}, "5": {"ratio_to_total_tokens": 0.02951468127186977, "expected_training_ratio_in_superset": 0.013415764214486259, "num_tokens": 970}, "6": {"ratio_to_total_tokens": 0.017404533698463412, "expected_training_ratio_in_superset": 0.009493382017343678, "num_tokens": 572}, "7": {"ratio_to_total_tokens": 0.01323596531264263, "expected_training_ratio_in_superset": 0.008422887017136218, "num_tokens": 435}, "8": {"ratio_to_total_tokens": 0.007454739084132056, "expected_training_ratio_in_superset": 0.005421628424823313, "num_tokens": 245}, "9": {"ratio_to_total_tokens": 0.0033165982047771185, "expected_training_ratio_in_superset": 0.002713580349363097, "num_tokens": 109}, "10": {"ratio_to_total_tokens": 0.003468735737106344, "expected_training_ratio_in_superset": 0.0031533961246421305, "num_tokens": 114}, "11": {"ratio_to_total_tokens": 0.04862315533242051, "expected_training_ratio_in_superset": 0.04862315533242051, "num_tokens": 1598}, "total_training_compared_to_full_model": 0.20903143714645311}