gsaltintas commited on
Commit
6440944
·
verified ·
1 Parent(s): 5331a2b

Upload fineweb2_hq/flexitok--bpe_swe_Latn_8000_overlap.json with huggingface_hub

Browse files
fineweb2_hq/flexitok--bpe_swe_Latn_8000_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"1": {"ratio_to_total_tokens": 0.415375, "expected_training_ratio_in_superset": 0.019779761904761904, "num_tokens": 3323}, "2": {"ratio_to_total_tokens": 0.187625, "expected_training_ratio_in_superset": 0.017869047619047618, "num_tokens": 1501}, "3": {"ratio_to_total_tokens": 0.05775, "expected_training_ratio_in_superset": 0.00825, "num_tokens": 462}, "4": {"ratio_to_total_tokens": 0.03875, "expected_training_ratio_in_superset": 0.00738095238095238, "num_tokens": 310}, "5": {"ratio_to_total_tokens": 0.02875, "expected_training_ratio_in_superset": 0.006845238095238095, "num_tokens": 230}, "6": {"ratio_to_total_tokens": 0.024125, "expected_training_ratio_in_superset": 0.006892857142857142, "num_tokens": 193}, "7": {"ratio_to_total_tokens": 0.02075, "expected_training_ratio_in_superset": 0.0069166666666666664, "num_tokens": 166}, "8": {"ratio_to_total_tokens": 0.02025, "expected_training_ratio_in_superset": 0.0077142857142857135, "num_tokens": 162}, "9": {"ratio_to_total_tokens": 0.019, "expected_training_ratio_in_superset": 0.008142857142857143, "num_tokens": 152}, "10": {"ratio_to_total_tokens": 0.0145, "expected_training_ratio_in_superset": 0.006904761904761904, "num_tokens": 116}, "11": {"ratio_to_total_tokens": 0.01675, "expected_training_ratio_in_superset": 0.008773809523809524, "num_tokens": 134}, "12": {"ratio_to_total_tokens": 0.015375, "expected_training_ratio_in_superset": 0.008785714285714284, "num_tokens": 123}, "13": {"ratio_to_total_tokens": 0.012625, "expected_training_ratio_in_superset": 0.00781547619047619, "num_tokens": 101}, "14": {"ratio_to_total_tokens": 0.01175, "expected_training_ratio_in_superset": 0.007833333333333333, "num_tokens": 94}, "15": {"ratio_to_total_tokens": 0.011875, "expected_training_ratio_in_superset": 0.008482142857142858, "num_tokens": 95}, "16": {"ratio_to_total_tokens": 0.01225, "expected_training_ratio_in_superset": 0.009333333333333332, "num_tokens": 98}, "17": {"ratio_to_total_tokens": 0.011625, "expected_training_ratio_in_superset": 0.009410714285714286, "num_tokens": 93}, "18": {"ratio_to_total_tokens": 0.011625, "expected_training_ratio_in_superset": 0.009964285714285714, "num_tokens": 93}, "19": {"ratio_to_total_tokens": 0.00775, "expected_training_ratio_in_superset": 0.007011904761904762, "num_tokens": 62}, "20": {"ratio_to_total_tokens": 0.00725, "expected_training_ratio_in_superset": 0.006904761904761904, "num_tokens": 58}, "21": {"ratio_to_total_tokens": 0.05425, "expected_training_ratio_in_superset": 0.05425, "num_tokens": 434}, "total_training_compared_to_full_model": 0.23526190476190476}