k1h0's picture
Upload folder using huggingface_hub
a63fab9 verified
{"current_steps": 1, "total_steps": 47, "loss": 1.099, "lr": 4.9944171965578836e-05, "epoch": 0.02099737532808399, "percentage": 2.13, "elapsed_time": "0:03:01", "remaining_time": "2:18:47", "throughput": 11584.36, "total_tokens": 2097152}
{"current_steps": 2, "total_steps": 47, "loss": 1.0954, "lr": 4.97769372038695e-05, "epoch": 0.04199475065616798, "percentage": 4.26, "elapsed_time": "0:05:54", "remaining_time": "2:13:07", "throughput": 11815.38, "total_tokens": 4194304}
{"current_steps": 3, "total_steps": 47, "loss": 1.0273, "lr": 4.9499042625914674e-05, "epoch": 0.06299212598425197, "percentage": 6.38, "elapsed_time": "0:08:49", "remaining_time": "2:09:21", "throughput": 11889.36, "total_tokens": 6291456}
{"current_steps": 4, "total_steps": 47, "loss": 0.9301, "lr": 4.911172937635942e-05, "epoch": 0.08398950131233596, "percentage": 8.51, "elapsed_time": "0:11:43", "remaining_time": "2:05:59", "throughput": 11929.11, "total_tokens": 8388608}
{"current_steps": 5, "total_steps": 47, "loss": 0.8696, "lr": 4.861672729019797e-05, "epoch": 0.10498687664041995, "percentage": 10.64, "elapsed_time": "0:14:37", "remaining_time": "2:02:50", "throughput": 11950.39, "total_tokens": 10485760}
{"current_steps": 6, "total_steps": 47, "loss": 0.8379, "lr": 4.801624716691072e-05, "epoch": 0.12598425196850394, "percentage": 12.77, "elapsed_time": "0:17:31", "remaining_time": "1:59:47", "throughput": 11963.64, "total_tokens": 12582912}
{"current_steps": 7, "total_steps": 47, "loss": 0.8087, "lr": 4.731297089649703e-05, "epoch": 0.14698162729658792, "percentage": 14.89, "elapsed_time": "0:20:25", "remaining_time": "1:56:45", "throughput": 11974.16, "total_tokens": 14680064}
{"current_steps": 8, "total_steps": 47, "loss": 0.8071, "lr": 4.651003948150349e-05, "epoch": 0.1679790026246719, "percentage": 17.02, "elapsed_time": "0:23:20", "remaining_time": "1:53:45", "throughput": 11982.48, "total_tokens": 16777216}
{"current_steps": 9, "total_steps": 47, "loss": 0.7521, "lr": 4.561103900854401e-05, "epoch": 0.1889763779527559, "percentage": 19.15, "elapsed_time": "0:26:14", "remaining_time": "1:50:47", "throughput": 11987.35, "total_tokens": 18874368}
{"current_steps": 10, "total_steps": 47, "loss": 0.7666, "lr": 4.4619984631966524e-05, "epoch": 0.2099737532808399, "percentage": 21.28, "elapsed_time": "0:29:09", "remaining_time": "1:47:51", "throughput": 11990.25, "total_tokens": 20971520}
{"current_steps": 11, "total_steps": 47, "loss": 0.7226, "lr": 4.354130264119894e-05, "epoch": 0.23097112860892388, "percentage": 23.4, "elapsed_time": "0:32:03", "remaining_time": "1:44:54", "throughput": 11994.52, "total_tokens": 23068672}
{"current_steps": 12, "total_steps": 47, "loss": 0.719, "lr": 4.2379810691866064e-05, "epoch": 0.25196850393700787, "percentage": 25.53, "elapsed_time": "0:34:57", "remaining_time": "1:41:57", "throughput": 11998.58, "total_tokens": 25165824}
{"current_steps": 13, "total_steps": 47, "loss": 0.7129, "lr": 4.114069628897006e-05, "epoch": 0.27296587926509186, "percentage": 27.66, "elapsed_time": "0:37:51", "remaining_time": "1:39:01", "throughput": 12001.09, "total_tokens": 27262976}
{"current_steps": 14, "total_steps": 47, "loss": 0.7029, "lr": 3.982949361823388e-05, "epoch": 0.29396325459317585, "percentage": 29.79, "elapsed_time": "0:40:45", "remaining_time": "1:36:05", "throughput": 12003.39, "total_tokens": 29360128}
{"current_steps": 15, "total_steps": 47, "loss": 0.693, "lr": 3.845205882908432e-05, "epoch": 0.31496062992125984, "percentage": 31.91, "elapsed_time": "0:43:40", "remaining_time": "1:33:09", "throughput": 12005.3, "total_tokens": 31457280}
{"current_steps": 16, "total_steps": 47, "loss": 0.689, "lr": 3.7014543879667094e-05, "epoch": 0.3359580052493438, "percentage": 34.04, "elapsed_time": "0:46:34", "remaining_time": "1:30:14", "throughput": 12006.81, "total_tokens": 33554432}
{"current_steps": 17, "total_steps": 47, "loss": 0.6898, "lr": 3.552336906070838e-05, "epoch": 0.3569553805774278, "percentage": 36.17, "elapsed_time": "0:49:28", "remaining_time": "1:27:18", "throughput": 12008.94, "total_tokens": 35651584}
{"current_steps": 18, "total_steps": 47, "loss": 0.6596, "lr": 3.398519432093782e-05, "epoch": 0.3779527559055118, "percentage": 38.3, "elapsed_time": "0:52:22", "remaining_time": "1:24:23", "throughput": 12010.5, "total_tokens": 37748736}
{"current_steps": 19, "total_steps": 47, "loss": 0.6796, "lr": 3.2406889522140856e-05, "epoch": 0.3989501312335958, "percentage": 40.43, "elapsed_time": "0:55:16", "remaining_time": "1:21:28", "throughput": 12012.77, "total_tokens": 39845888}
{"current_steps": 20, "total_steps": 47, "loss": 0.6595, "lr": 3.079550375668821e-05, "epoch": 0.4199475065616798, "percentage": 42.55, "elapsed_time": "0:58:10", "remaining_time": "1:18:32", "throughput": 12016.11, "total_tokens": 41943040}
{"current_steps": 21, "total_steps": 47, "loss": 0.655, "lr": 2.9158233864578254e-05, "epoch": 0.4409448818897638, "percentage": 44.68, "elapsed_time": "1:01:03", "remaining_time": "1:15:35", "throughput": 12021.16, "total_tokens": 44040192}
{"current_steps": 22, "total_steps": 47, "loss": 0.6566, "lr": 2.7502392290602463e-05, "epoch": 0.46194225721784776, "percentage": 46.81, "elapsed_time": "1:03:56", "remaining_time": "1:12:39", "throughput": 12025.76, "total_tokens": 46137344}
{"current_steps": 23, "total_steps": 47, "loss": 0.6483, "lr": 2.5835374425191866e-05, "epoch": 0.48293963254593175, "percentage": 48.94, "elapsed_time": "1:06:49", "remaining_time": "1:09:44", "throughput": 12029.04, "total_tokens": 48234496}
{"current_steps": 24, "total_steps": 47, "loss": 0.6319, "lr": 2.4164625574808146e-05, "epoch": 0.5039370078740157, "percentage": 51.06, "elapsed_time": "1:09:42", "remaining_time": "1:06:48", "throughput": 12032.55, "total_tokens": 50331648}
{"current_steps": 25, "total_steps": 47, "loss": 0.6479, "lr": 2.2497607709397543e-05, "epoch": 0.5249343832020997, "percentage": 53.19, "elapsed_time": "1:12:36", "remaining_time": "1:03:53", "throughput": 12035.5, "total_tokens": 52428800}
{"current_steps": 26, "total_steps": 47, "loss": 0.6253, "lr": 2.0841766135421752e-05, "epoch": 0.5459317585301837, "percentage": 55.32, "elapsed_time": "1:15:29", "remaining_time": "1:00:58", "throughput": 12037.82, "total_tokens": 54525952}
{"current_steps": 27, "total_steps": 47, "loss": 0.6248, "lr": 1.920449624331179e-05, "epoch": 0.5669291338582677, "percentage": 57.45, "elapsed_time": "1:18:23", "remaining_time": "0:58:03", "throughput": 12039.18, "total_tokens": 56623104}
{"current_steps": 28, "total_steps": 47, "loss": 0.6237, "lr": 1.7593110477859153e-05, "epoch": 0.5879265091863517, "percentage": 59.57, "elapsed_time": "1:21:16", "remaining_time": "0:55:09", "throughput": 12040.88, "total_tokens": 58720256}
{"current_steps": 29, "total_steps": 47, "loss": 0.63, "lr": 1.6014805679062185e-05, "epoch": 0.6089238845144357, "percentage": 61.7, "elapsed_time": "1:24:10", "remaining_time": "0:52:14", "throughput": 12042.42, "total_tokens": 60817408}
{"current_steps": 30, "total_steps": 47, "loss": 0.6241, "lr": 1.447663093929163e-05, "epoch": 0.6299212598425197, "percentage": 63.83, "elapsed_time": "1:27:03", "remaining_time": "0:49:20", "throughput": 12043.81, "total_tokens": 62914560}
{"current_steps": 31, "total_steps": 47, "loss": 0.6062, "lr": 1.2985456120332906e-05, "epoch": 0.6509186351706037, "percentage": 65.96, "elapsed_time": "1:29:57", "remaining_time": "0:46:25", "throughput": 12044.95, "total_tokens": 65011712}
{"current_steps": 32, "total_steps": 47, "loss": 0.6356, "lr": 1.1547941170915686e-05, "epoch": 0.6719160104986877, "percentage": 68.09, "elapsed_time": "1:32:51", "remaining_time": "0:43:31", "throughput": 12046.11, "total_tokens": 67108864}
{"current_steps": 33, "total_steps": 47, "loss": 0.6151, "lr": 1.0170506381766121e-05, "epoch": 0.6929133858267716, "percentage": 70.21, "elapsed_time": "1:35:44", "remaining_time": "0:40:37", "throughput": 12046.85, "total_tokens": 69206016}
{"current_steps": 34, "total_steps": 47, "loss": 0.6198, "lr": 8.85930371102994e-06, "epoch": 0.7139107611548556, "percentage": 72.34, "elapsed_time": "1:38:38", "remaining_time": "0:37:43", "throughput": 12047.18, "total_tokens": 71303168}
{"current_steps": 35, "total_steps": 47, "loss": 0.5952, "lr": 7.620189308133943e-06, "epoch": 0.7349081364829396, "percentage": 74.47, "elapsed_time": "1:41:32", "remaining_time": "0:34:48", "throughput": 12047.45, "total_tokens": 73400320}
{"current_steps": 36, "total_steps": 47, "loss": 0.6126, "lr": 6.458697358801061e-06, "epoch": 0.7559055118110236, "percentage": 76.6, "elapsed_time": "1:44:26", "remaining_time": "0:31:54", "throughput": 12047.72, "total_tokens": 75497472}
{"current_steps": 37, "total_steps": 47, "loss": 0.637, "lr": 5.380015368033476e-06, "epoch": 0.7769028871391076, "percentage": 78.72, "elapsed_time": "1:47:20", "remaining_time": "0:29:00", "throughput": 12047.79, "total_tokens": 77594624}
{"current_steps": 38, "total_steps": 47, "loss": 0.6432, "lr": 4.388960991455998e-06, "epoch": 0.7979002624671916, "percentage": 80.85, "elapsed_time": "1:50:15", "remaining_time": "0:26:06", "throughput": 12047.09, "total_tokens": 79691776}
{"current_steps": 39, "total_steps": 47, "loss": 0.6329, "lr": 3.489960518496521e-06, "epoch": 0.8188976377952756, "percentage": 82.98, "elapsed_time": "1:53:08", "remaining_time": "0:23:12", "throughput": 12047.43, "total_tokens": 81788928}
{"current_steps": 40, "total_steps": 47, "loss": 0.629, "lr": 2.687029103502972e-06, "epoch": 0.8398950131233596, "percentage": 85.11, "elapsed_time": "1:56:03", "remaining_time": "0:20:18", "throughput": 12047.22, "total_tokens": 83886080}
{"current_steps": 41, "total_steps": 47, "loss": 0.6277, "lr": 1.983752833089278e-06, "epoch": 0.8608923884514436, "percentage": 87.23, "elapsed_time": "1:58:57", "remaining_time": "0:17:24", "throughput": 12046.86, "total_tokens": 85983232}
{"current_steps": 42, "total_steps": 47, "loss": 0.6192, "lr": 1.3832727098020332e-06, "epoch": 0.8818897637795275, "percentage": 89.36, "elapsed_time": "2:01:52", "remaining_time": "0:14:30", "throughput": 12045.85, "total_tokens": 88080384}
{"current_steps": 43, "total_steps": 47, "loss": 0.6203, "lr": 8.882706236405886e-07, "epoch": 0.9028871391076115, "percentage": 91.49, "elapsed_time": "2:04:47", "remaining_time": "0:11:36", "throughput": 12043.63, "total_tokens": 90177536}
{"current_steps": 44, "total_steps": 47, "loss": 0.6155, "lr": 5.009573740853313e-07, "epoch": 0.9238845144356955, "percentage": 93.62, "elapsed_time": "2:07:37", "remaining_time": "0:08:42", "throughput": 12050.55, "total_tokens": 92274688}
{"current_steps": 45, "total_steps": 47, "loss": 0.6273, "lr": 2.230627961304993e-07, "epoch": 0.9448818897637795, "percentage": 95.74, "elapsed_time": "2:10:26", "remaining_time": "0:05:47", "throughput": 12057.85, "total_tokens": 94371840}
{"current_steps": 46, "total_steps": 47, "loss": 0.624, "lr": 5.5828034421170907e-08, "epoch": 0.9658792650918635, "percentage": 97.87, "elapsed_time": "2:13:15", "remaining_time": "0:02:53", "throughput": 12064.68, "total_tokens": 96468992}
{"current_steps": 47, "total_steps": 47, "loss": 0.6356, "lr": 0.0, "epoch": 0.9868766404199475, "percentage": 100.0, "elapsed_time": "2:16:05", "remaining_time": "0:00:00", "throughput": 12071.55, "total_tokens": 98566144}
{"current_steps": 47, "total_steps": 47, "epoch": 0.9868766404199475, "percentage": 100.0, "elapsed_time": "2:16:29", "remaining_time": "0:00:00", "throughput": 12035.81, "total_tokens": 98566144}