gsaltintas commited on
Commit
afd355b
·
verified ·
1 Parent(s): d446316

Upload fineweb2_hq/flexitok--bpe_fra_Latn_8000_overlap.json with huggingface_hub

Browse files
fineweb2_hq/flexitok--bpe_fra_Latn_8000_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"1": {"ratio_to_total_tokens": 0.510625, "expected_training_ratio_in_superset": 0.02431547619047619, "num_tokens": 4085}, "2": {"ratio_to_total_tokens": 0.105, "expected_training_ratio_in_superset": 0.01, "num_tokens": 840}, "3": {"ratio_to_total_tokens": 0.054375, "expected_training_ratio_in_superset": 0.007767857142857142, "num_tokens": 435}, "4": {"ratio_to_total_tokens": 0.038625, "expected_training_ratio_in_superset": 0.007357142857142856, "num_tokens": 309}, "5": {"ratio_to_total_tokens": 0.0305, "expected_training_ratio_in_superset": 0.007261904761904762, "num_tokens": 244}, "6": {"ratio_to_total_tokens": 0.02275, "expected_training_ratio_in_superset": 0.0065, "num_tokens": 182}, "7": {"ratio_to_total_tokens": 0.017375, "expected_training_ratio_in_superset": 0.005791666666666666, "num_tokens": 139}, "8": {"ratio_to_total_tokens": 0.01775, "expected_training_ratio_in_superset": 0.0067619047619047615, "num_tokens": 142}, "9": {"ratio_to_total_tokens": 0.016625, "expected_training_ratio_in_superset": 0.007125, "num_tokens": 133}, "10": {"ratio_to_total_tokens": 0.01325, "expected_training_ratio_in_superset": 0.006309523809523809, "num_tokens": 106}, "11": {"ratio_to_total_tokens": 0.017, "expected_training_ratio_in_superset": 0.008904761904761905, "num_tokens": 136}, "12": {"ratio_to_total_tokens": 0.014875, "expected_training_ratio_in_superset": 0.0085, "num_tokens": 119}, "13": {"ratio_to_total_tokens": 0.012125, "expected_training_ratio_in_superset": 0.007505952380952381, "num_tokens": 97}, "14": {"ratio_to_total_tokens": 0.013, "expected_training_ratio_in_superset": 0.008666666666666666, "num_tokens": 104}, "15": {"ratio_to_total_tokens": 0.01175, "expected_training_ratio_in_superset": 0.008392857142857143, "num_tokens": 94}, "16": {"ratio_to_total_tokens": 0.0115, "expected_training_ratio_in_superset": 0.00876190476190476, "num_tokens": 92}, "17": {"ratio_to_total_tokens": 0.01175, "expected_training_ratio_in_superset": 0.009511904761904763, "num_tokens": 94}, "18": {"ratio_to_total_tokens": 0.011625, "expected_training_ratio_in_superset": 0.009964285714285714, "num_tokens": 93}, "19": {"ratio_to_total_tokens": 0.008, "expected_training_ratio_in_superset": 0.007238095238095238, "num_tokens": 64}, "20": {"ratio_to_total_tokens": 0.00725, "expected_training_ratio_in_superset": 0.006904761904761904, "num_tokens": 58}, "21": {"ratio_to_total_tokens": 0.05425, "expected_training_ratio_in_superset": 0.05425, "num_tokens": 434}, "total_training_compared_to_full_model": 0.22779166666666667}