gsaltintas commited on
Commit
c8982f1
·
verified ·
1 Parent(s): 1934bfb

Upload fineweb2_hq/flexitok--bpe_jpn_Jpan_8000_overlap.json with huggingface_hub

Browse files
fineweb2_hq/flexitok--bpe_jpn_Jpan_8000_overlap.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"1": {"ratio_to_total_tokens": 0.266375, "expected_training_ratio_in_superset": 0.012684523809523809, "num_tokens": 2131}, "2": {"ratio_to_total_tokens": 0.414125, "expected_training_ratio_in_superset": 0.03944047619047619, "num_tokens": 3313}, "3": {"ratio_to_total_tokens": 0.042125, "expected_training_ratio_in_superset": 0.0060178571428571425, "num_tokens": 337}, "4": {"ratio_to_total_tokens": 0.0295, "expected_training_ratio_in_superset": 0.005619047619047619, "num_tokens": 236}, "5": {"ratio_to_total_tokens": 0.020625, "expected_training_ratio_in_superset": 0.004910714285714286, "num_tokens": 165}, "6": {"ratio_to_total_tokens": 0.01475, "expected_training_ratio_in_superset": 0.004214285714285715, "num_tokens": 118}, "7": {"ratio_to_total_tokens": 0.015375, "expected_training_ratio_in_superset": 0.005125, "num_tokens": 123}, "8": {"ratio_to_total_tokens": 0.01375, "expected_training_ratio_in_superset": 0.005238095238095238, "num_tokens": 110}, "9": {"ratio_to_total_tokens": 0.012125, "expected_training_ratio_in_superset": 0.0051964285714285715, "num_tokens": 97}, "10": {"ratio_to_total_tokens": 0.00875, "expected_training_ratio_in_superset": 0.004166666666666666, "num_tokens": 70}, "11": {"ratio_to_total_tokens": 0.012375, "expected_training_ratio_in_superset": 0.006482142857142857, "num_tokens": 99}, "12": {"ratio_to_total_tokens": 0.01325, "expected_training_ratio_in_superset": 0.007571428571428571, "num_tokens": 106}, "13": {"ratio_to_total_tokens": 0.00975, "expected_training_ratio_in_superset": 0.006035714285714286, "num_tokens": 78}, "14": {"ratio_to_total_tokens": 0.0105, "expected_training_ratio_in_superset": 0.007, "num_tokens": 84}, "15": {"ratio_to_total_tokens": 0.010875, "expected_training_ratio_in_superset": 0.007767857142857143, "num_tokens": 87}, "16": {"ratio_to_total_tokens": 0.01275, "expected_training_ratio_in_superset": 0.009714285714285713, "num_tokens": 102}, "17": {"ratio_to_total_tokens": 0.012, "expected_training_ratio_in_superset": 0.009714285714285715, "num_tokens": 96}, "18": {"ratio_to_total_tokens": 0.0115, "expected_training_ratio_in_superset": 0.009857142857142856, "num_tokens": 92}, "19": {"ratio_to_total_tokens": 0.008, "expected_training_ratio_in_superset": 0.007238095238095238, "num_tokens": 64}, "20": {"ratio_to_total_tokens": 0.00725, "expected_training_ratio_in_superset": 0.006904761904761904, "num_tokens": 58}, "21": {"ratio_to_total_tokens": 0.05425, "expected_training_ratio_in_superset": 0.05425, "num_tokens": 434}, "total_training_compared_to_full_model": 0.22514880952380953}