rbelanec commited on
Commit
dbf2b49
·
verified ·
1 Parent(s): 4c8e9c8

Training in progress, step 39400

Browse files
Files changed (1) hide show
  1. trainer_log.jsonl +41 -0
trainer_log.jsonl CHANGED
@@ -8034,3 +8034,44 @@
8034
  {"current_steps": 39195, "total_steps": 40000, "loss": 3.8137, "lr": 5.0074292995316854e-08, "epoch": 1.2545611676589208, "percentage": 97.99, "elapsed_time": "1 day, 9:19:49", "remaining_time": "0:41:04", "throughput": 497.13, "total_tokens": 59650432}
8035
  {"current_steps": 39200, "total_steps": 40000, "loss": 3.5466, "lr": 4.945515514400978e-08, "epoch": 1.254721208629409, "percentage": 98.0, "elapsed_time": "1 day, 9:19:51", "remaining_time": "0:40:48", "throughput": 497.18, "total_tokens": 59657728}
8036
  {"current_steps": 39200, "total_steps": 40000, "eval_loss": 3.6650795936584473, "epoch": 1.254721208629409, "percentage": 98.0, "elapsed_time": "1 day, 9:28:27", "remaining_time": "0:40:59", "throughput": 495.05, "total_tokens": 59657728}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8034
  {"current_steps": 39195, "total_steps": 40000, "loss": 3.8137, "lr": 5.0074292995316854e-08, "epoch": 1.2545611676589208, "percentage": 97.99, "elapsed_time": "1 day, 9:19:49", "remaining_time": "0:41:04", "throughput": 497.13, "total_tokens": 59650432}
8035
  {"current_steps": 39200, "total_steps": 40000, "loss": 3.5466, "lr": 4.945515514400978e-08, "epoch": 1.254721208629409, "percentage": 98.0, "elapsed_time": "1 day, 9:19:51", "remaining_time": "0:40:48", "throughput": 497.18, "total_tokens": 59657728}
8036
  {"current_steps": 39200, "total_steps": 40000, "eval_loss": 3.6650795936584473, "epoch": 1.254721208629409, "percentage": 98.0, "elapsed_time": "1 day, 9:28:27", "remaining_time": "0:40:59", "throughput": 495.05, "total_tokens": 59657728}
8037
+ {"current_steps": 39205, "total_steps": 40000, "loss": 4.7714, "lr": 4.883986498026571e-08, "epoch": 1.2548812495998976, "percentage": 98.01, "elapsed_time": "1 day, 9:28:32", "remaining_time": "0:40:43", "throughput": 495.1, "total_tokens": 59665104}
8038
+ {"current_steps": 39210, "total_steps": 40000, "loss": 4.1941, "lr": 4.822842259896987e-08, "epoch": 1.255041290570386, "percentage": 98.02, "elapsed_time": "1 day, 9:28:34", "remaining_time": "0:40:28", "throughput": 495.15, "total_tokens": 59672464}
8039
+ {"current_steps": 39215, "total_steps": 40000, "loss": 2.7098, "lr": 4.762082809441626e-08, "epoch": 1.2552013315408745, "percentage": 98.04, "elapsed_time": "1 day, 9:28:36", "remaining_time": "0:40:12", "throughput": 495.2, "total_tokens": 59679904}
8040
+ {"current_steps": 39220, "total_steps": 40000, "loss": 4.1571, "lr": 4.7017081560302156e-08, "epoch": 1.2553613725113628, "percentage": 98.05, "elapsed_time": "1 day, 9:28:38", "remaining_time": "0:39:56", "throughput": 495.25, "total_tokens": 59687104}
8041
+ {"current_steps": 39225, "total_steps": 40000, "loss": 3.086, "lr": 4.6417183089730866e-08, "epoch": 1.2555214134818513, "percentage": 98.06, "elapsed_time": "1 day, 9:28:41", "remaining_time": "0:39:41", "throughput": 495.3, "total_tokens": 59694608}
8042
+ {"current_steps": 39230, "total_steps": 40000, "loss": 3.8701, "lr": 4.5821132775217265e-08, "epoch": 1.2556814544523398, "percentage": 98.08, "elapsed_time": "1 day, 9:28:43", "remaining_time": "0:39:25", "throughput": 495.36, "total_tokens": 59702176}
8043
+ {"current_steps": 39235, "total_steps": 40000, "loss": 3.6428, "lr": 4.5228930708679504e-08, "epoch": 1.2558414954228283, "percentage": 98.09, "elapsed_time": "1 day, 9:28:46", "remaining_time": "0:39:10", "throughput": 495.41, "total_tokens": 59709840}
8044
+ {"current_steps": 39240, "total_steps": 40000, "loss": 3.5713, "lr": 4.464057698144175e-08, "epoch": 1.2560015363933168, "percentage": 98.1, "elapsed_time": "1 day, 9:28:48", "remaining_time": "0:38:54", "throughput": 495.46, "total_tokens": 59716848}
8045
+ {"current_steps": 39245, "total_steps": 40000, "loss": 2.5534, "lr": 4.4056071684236974e-08, "epoch": 1.256161577363805, "percentage": 98.11, "elapsed_time": "1 day, 9:28:50", "remaining_time": "0:38:38", "throughput": 495.51, "total_tokens": 59724704}
8046
+ {"current_steps": 39250, "total_steps": 40000, "loss": 2.7845, "lr": 4.347541490719864e-08, "epoch": 1.2563216183342936, "percentage": 98.12, "elapsed_time": "1 day, 9:28:53", "remaining_time": "0:38:23", "throughput": 495.57, "total_tokens": 59732208}
8047
+ {"current_steps": 39255, "total_steps": 40000, "loss": 3.274, "lr": 4.2898606739877336e-08, "epoch": 1.256481659304782, "percentage": 98.14, "elapsed_time": "1 day, 9:28:55", "remaining_time": "0:38:07", "throughput": 495.62, "total_tokens": 59740128}
8048
+ {"current_steps": 39260, "total_steps": 40000, "loss": 4.1268, "lr": 4.232564727122135e-08, "epoch": 1.2566417002752703, "percentage": 98.15, "elapsed_time": "1 day, 9:28:58", "remaining_time": "0:37:51", "throughput": 495.68, "total_tokens": 59747952}
8049
+ {"current_steps": 39265, "total_steps": 40000, "loss": 3.1858, "lr": 4.1756536589585004e-08, "epoch": 1.2568017412457588, "percentage": 98.16, "elapsed_time": "1 day, 9:29:00", "remaining_time": "0:37:36", "throughput": 495.73, "total_tokens": 59755712}
8050
+ {"current_steps": 39270, "total_steps": 40000, "loss": 3.1961, "lr": 4.119127478273976e-08, "epoch": 1.2569617822162473, "percentage": 98.17, "elapsed_time": "1 day, 9:29:02", "remaining_time": "0:37:20", "throughput": 495.78, "total_tokens": 59763216}
8051
+ {"current_steps": 39275, "total_steps": 40000, "loss": 3.4948, "lr": 4.062986193784923e-08, "epoch": 1.2571218231867358, "percentage": 98.19, "elapsed_time": "1 day, 9:29:04", "remaining_time": "0:37:05", "throughput": 495.84, "total_tokens": 59770736}
8052
+ {"current_steps": 39280, "total_steps": 40000, "loss": 4.1516, "lr": 4.007229814149416e-08, "epoch": 1.2572818641572243, "percentage": 98.2, "elapsed_time": "1 day, 9:29:07", "remaining_time": "0:36:49", "throughput": 495.89, "total_tokens": 59778112}
8053
+ {"current_steps": 39285, "total_steps": 40000, "loss": 3.0186, "lr": 3.951858347965576e-08, "epoch": 1.2574419051277128, "percentage": 98.21, "elapsed_time": "1 day, 9:29:09", "remaining_time": "0:36:34", "throughput": 495.94, "total_tokens": 59785792}
8054
+ {"current_steps": 39290, "total_steps": 40000, "loss": 3.1798, "lr": 3.896871803772684e-08, "epoch": 1.257601946098201, "percentage": 98.22, "elapsed_time": "1 day, 9:29:11", "remaining_time": "0:36:18", "throughput": 495.99, "total_tokens": 59793168}
8055
+ {"current_steps": 39295, "total_steps": 40000, "loss": 3.1775, "lr": 3.842270190050068e-08, "epoch": 1.2577619870686896, "percentage": 98.24, "elapsed_time": "1 day, 9:29:14", "remaining_time": "0:36:02", "throughput": 496.05, "total_tokens": 59801264}
8056
+ {"current_steps": 39300, "total_steps": 40000, "loss": 3.9554, "lr": 3.7880535152179376e-08, "epoch": 1.257922028039178, "percentage": 98.25, "elapsed_time": "1 day, 9:29:16", "remaining_time": "0:35:47", "throughput": 496.11, "total_tokens": 59808944}
8057
+ {"current_steps": 39305, "total_steps": 40000, "loss": 2.7299, "lr": 3.734221787637382e-08, "epoch": 1.2580820690096663, "percentage": 98.26, "elapsed_time": "1 day, 9:29:19", "remaining_time": "0:35:31", "throughput": 496.16, "total_tokens": 59816400}
8058
+ {"current_steps": 39310, "total_steps": 40000, "loss": 4.2397, "lr": 3.680775015609817e-08, "epoch": 1.2582421099801548, "percentage": 98.28, "elapsed_time": "1 day, 9:29:21", "remaining_time": "0:35:16", "throughput": 496.21, "total_tokens": 59824064}
8059
+ {"current_steps": 39315, "total_steps": 40000, "loss": 3.9699, "lr": 3.627713207377537e-08, "epoch": 1.2584021509506433, "percentage": 98.29, "elapsed_time": "1 day, 9:29:24", "remaining_time": "0:35:00", "throughput": 496.27, "total_tokens": 59831984}
8060
+ {"current_steps": 39320, "total_steps": 40000, "loss": 3.8412, "lr": 3.575036371123164e-08, "epoch": 1.2585621919211318, "percentage": 98.3, "elapsed_time": "1 day, 9:29:26", "remaining_time": "0:34:45", "throughput": 496.32, "total_tokens": 59839376}
8061
+ {"current_steps": 39325, "total_steps": 40000, "loss": 2.8608, "lr": 3.5227445149704776e-08, "epoch": 1.2587222328916203, "percentage": 98.31, "elapsed_time": "1 day, 9:29:28", "remaining_time": "0:34:29", "throughput": 496.38, "total_tokens": 59847552}
8062
+ {"current_steps": 39330, "total_steps": 40000, "loss": 3.6937, "lr": 3.470837646983027e-08, "epoch": 1.2588822738621088, "percentage": 98.32, "elapsed_time": "1 day, 9:29:31", "remaining_time": "0:34:13", "throughput": 496.43, "total_tokens": 59854976}
8063
+ {"current_steps": 39335, "total_steps": 40000, "loss": 3.6016, "lr": 3.419315775165799e-08, "epoch": 1.259042314832597, "percentage": 98.34, "elapsed_time": "1 day, 9:29:33", "remaining_time": "0:33:58", "throughput": 496.48, "total_tokens": 59862544}
8064
+ {"current_steps": 39340, "total_steps": 40000, "loss": 2.718, "lr": 3.368178907464103e-08, "epoch": 1.2592023558030856, "percentage": 98.35, "elapsed_time": "1 day, 9:29:35", "remaining_time": "0:33:42", "throughput": 496.53, "total_tokens": 59870064}
8065
+ {"current_steps": 39345, "total_steps": 40000, "loss": 3.6147, "lr": 3.317427051763855e-08, "epoch": 1.259362396773574, "percentage": 98.36, "elapsed_time": "1 day, 9:29:38", "remaining_time": "0:33:27", "throughput": 496.59, "total_tokens": 59877776}
8066
+ {"current_steps": 39350, "total_steps": 40000, "loss": 2.7713, "lr": 3.267060215891571e-08, "epoch": 1.2595224377440624, "percentage": 98.38, "elapsed_time": "1 day, 9:29:40", "remaining_time": "0:33:11", "throughput": 496.64, "total_tokens": 59884976}
8067
+ {"current_steps": 39355, "total_steps": 40000, "loss": 3.1617, "lr": 3.217078407614649e-08, "epoch": 1.2596824787145509, "percentage": 98.39, "elapsed_time": "1 day, 9:29:42", "remaining_time": "0:32:56", "throughput": 496.69, "total_tokens": 59892288}
8068
+ {"current_steps": 39360, "total_steps": 40000, "loss": 6.0121, "lr": 3.1674816346405345e-08, "epoch": 1.2598425196850394, "percentage": 98.4, "elapsed_time": "1 day, 9:29:45", "remaining_time": "0:32:40", "throughput": 496.74, "total_tokens": 59899728}
8069
+ {"current_steps": 39365, "total_steps": 40000, "loss": 3.3705, "lr": 3.11826990461811e-08, "epoch": 1.2600025606555278, "percentage": 98.41, "elapsed_time": "1 day, 9:29:47", "remaining_time": "0:32:25", "throughput": 496.79, "total_tokens": 59907072}
8070
+ {"current_steps": 39370, "total_steps": 40000, "loss": 2.6372, "lr": 3.069443225136304e-08, "epoch": 1.2601626016260163, "percentage": 98.42, "elapsed_time": "1 day, 9:29:49", "remaining_time": "0:32:09", "throughput": 496.85, "total_tokens": 59914448}
8071
+ {"current_steps": 39375, "total_steps": 40000, "loss": 3.402, "lr": 3.021001603724372e-08, "epoch": 1.2603226425965048, "percentage": 98.44, "elapsed_time": "1 day, 9:29:52", "remaining_time": "0:31:54", "throughput": 496.9, "total_tokens": 59922032}
8072
+ {"current_steps": 39380, "total_steps": 40000, "loss": 3.7599, "lr": 2.9729450478532818e-08, "epoch": 1.2604826835669931, "percentage": 98.45, "elapsed_time": "1 day, 9:29:54", "remaining_time": "0:31:38", "throughput": 496.95, "total_tokens": 59929888}
8073
+ {"current_steps": 39385, "total_steps": 40000, "loss": 3.3647, "lr": 2.9252735649337726e-08, "epoch": 1.2606427245374816, "percentage": 98.46, "elapsed_time": "1 day, 9:29:56", "remaining_time": "0:31:23", "throughput": 497.01, "total_tokens": 59937264}
8074
+ {"current_steps": 39390, "total_steps": 40000, "loss": 4.1437, "lr": 2.8779871623171863e-08, "epoch": 1.26080276550797, "percentage": 98.47, "elapsed_time": "1 day, 9:29:59", "remaining_time": "0:31:07", "throughput": 497.06, "total_tokens": 59944960}
8075
+ {"current_steps": 39395, "total_steps": 40000, "loss": 3.4668, "lr": 2.8310858472957448e-08, "epoch": 1.2609628064784584, "percentage": 98.49, "elapsed_time": "1 day, 9:30:01", "remaining_time": "0:30:52", "throughput": 497.11, "total_tokens": 59952560}
8076
+ {"current_steps": 39400, "total_steps": 40000, "loss": 3.0552, "lr": 2.784569627101996e-08, "epoch": 1.2611228474489469, "percentage": 98.5, "elapsed_time": "1 day, 9:30:04", "remaining_time": "0:30:36", "throughput": 497.17, "total_tokens": 59960256}
8077
+ {"current_steps": 39400, "total_steps": 40000, "eval_loss": 3.6650795936584473, "epoch": 1.2611228474489469, "percentage": 98.5, "elapsed_time": "1 day, 9:38:40", "remaining_time": "0:30:44", "throughput": 495.05, "total_tokens": 59960256}