rbelanec commited on
Commit
c473c96
verified
1 Parent(s): 7aa5f29

Training in progress, step 5610

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +53 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cb4de17b937d32f869b92f23e63a4b34e3f7bc27a47a41dc154d7b015647f0c
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3cb1fb2b1b6bb56fb3dfac23a78de440197341482e30f21db4d301ea1619fcb
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -1087,3 +1087,56 @@
1087
  {"current_steps": 5340, "total_steps": 5610, "loss": 0.0005, "lr": 3.5457516904947587e-07, "epoch": 9.518716577540108, "percentage": 95.19, "elapsed_time": "0:23:24", "remaining_time": "0:01:11", "throughput": 2359.52, "total_tokens": 3313672}
1088
  {"current_steps": 5345, "total_steps": 5610, "loss": 0.0007, "lr": 3.416415229512443e-07, "epoch": 9.527629233511586, "percentage": 95.28, "elapsed_time": "0:23:25", "remaining_time": "0:01:09", "throughput": 2359.97, "total_tokens": 3317224}
1089
  {"current_steps": 5350, "total_steps": 5610, "loss": 0.0004, "lr": 3.2894654410041417e-07, "epoch": 9.536541889483066, "percentage": 95.37, "elapsed_time": "0:23:26", "remaining_time": "0:01:08", "throughput": 2359.97, "total_tokens": 3319848}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  {"current_steps": 5340, "total_steps": 5610, "loss": 0.0005, "lr": 3.5457516904947587e-07, "epoch": 9.518716577540108, "percentage": 95.19, "elapsed_time": "0:23:24", "remaining_time": "0:01:11", "throughput": 2359.52, "total_tokens": 3313672}
1088
  {"current_steps": 5345, "total_steps": 5610, "loss": 0.0007, "lr": 3.416415229512443e-07, "epoch": 9.527629233511586, "percentage": 95.28, "elapsed_time": "0:23:25", "remaining_time": "0:01:09", "throughput": 2359.97, "total_tokens": 3317224}
1089
  {"current_steps": 5350, "total_steps": 5610, "loss": 0.0004, "lr": 3.2894654410041417e-07, "epoch": 9.536541889483066, "percentage": 95.37, "elapsed_time": "0:23:26", "remaining_time": "0:01:08", "throughput": 2359.97, "total_tokens": 3319848}
1090
+ {"current_steps": 5355, "total_steps": 5610, "loss": 0.0004, "lr": 3.1649035537117123e-07, "epoch": 9.545454545454545, "percentage": 95.45, "elapsed_time": "0:23:27", "remaining_time": "0:01:07", "throughput": 2360.11, "total_tokens": 3322664}
1091
+ {"current_steps": 5360, "total_steps": 5610, "loss": 0.0003, "lr": 3.042730773264557e-07, "epoch": 9.554367201426025, "percentage": 95.54, "elapsed_time": "0:23:29", "remaining_time": "0:01:05", "throughput": 2360.45, "total_tokens": 3325928}
1092
+ {"current_steps": 5365, "total_steps": 5610, "loss": 0.0003, "lr": 2.9229482821680197e-07, "epoch": 9.563279857397504, "percentage": 95.63, "elapsed_time": "0:23:30", "remaining_time": "0:01:04", "throughput": 2360.51, "total_tokens": 3328680}
1093
+ {"current_steps": 5370, "total_steps": 5610, "loss": 0.0005, "lr": 2.8055572397919784e-07, "epoch": 9.572192513368984, "percentage": 95.72, "elapsed_time": "0:23:31", "remaining_time": "0:01:03", "throughput": 2360.84, "total_tokens": 3331976}
1094
+ {"current_steps": 5375, "total_steps": 5610, "loss": 0.0002, "lr": 2.690558782359576e-07, "epoch": 9.581105169340464, "percentage": 95.81, "elapsed_time": "0:23:32", "remaining_time": "0:01:01", "throughput": 2361.0, "total_tokens": 3334888}
1095
+ {"current_steps": 5380, "total_steps": 5610, "loss": 0.0003, "lr": 2.5779540229361745e-07, "epoch": 9.590017825311943, "percentage": 95.9, "elapsed_time": "0:23:33", "remaining_time": "0:01:00", "throughput": 2361.23, "total_tokens": 3337960}
1096
+ {"current_steps": 5385, "total_steps": 5610, "loss": 0.0007, "lr": 2.467744051418641e-07, "epoch": 9.598930481283423, "percentage": 95.99, "elapsed_time": "0:23:34", "remaining_time": "0:00:59", "throughput": 2361.43, "total_tokens": 3340936}
1097
+ {"current_steps": 5390, "total_steps": 5610, "loss": 0.0006, "lr": 2.3599299345248292e-07, "epoch": 9.607843137254902, "percentage": 96.08, "elapsed_time": "0:23:35", "remaining_time": "0:00:57", "throughput": 2361.56, "total_tokens": 3343784}
1098
+ {"current_steps": 5395, "total_steps": 5610, "loss": 0.0004, "lr": 2.2545127157831413e-07, "epoch": 9.616755793226382, "percentage": 96.17, "elapsed_time": "0:23:37", "remaining_time": "0:00:56", "throughput": 2361.9, "total_tokens": 3347016}
1099
+ {"current_steps": 5400, "total_steps": 5610, "loss": 0.0003, "lr": 2.1514934155226208e-07, "epoch": 9.62566844919786, "percentage": 96.26, "elapsed_time": "0:23:38", "remaining_time": "0:00:55", "throughput": 2362.02, "total_tokens": 3349800}
1100
+ {"current_steps": 5405, "total_steps": 5610, "loss": 0.0005, "lr": 2.0508730308627933e-07, "epoch": 9.63458110516934, "percentage": 96.35, "elapsed_time": "0:23:39", "remaining_time": "0:00:53", "throughput": 2362.59, "total_tokens": 3353640}
1101
+ {"current_steps": 5410, "total_steps": 5610, "loss": 0.0008, "lr": 1.9526525357043136e-07, "epoch": 9.643493761140821, "percentage": 96.43, "elapsed_time": "0:23:40", "remaining_time": "0:00:52", "throughput": 2362.9, "total_tokens": 3356904}
1102
+ {"current_steps": 5415, "total_steps": 5610, "loss": 0.0002, "lr": 1.8568328807193337e-07, "epoch": 9.6524064171123, "percentage": 96.52, "elapsed_time": "0:23:41", "remaining_time": "0:00:51", "throughput": 2363.24, "total_tokens": 3360232}
1103
+ {"current_steps": 5420, "total_steps": 5610, "loss": 0.0007, "lr": 1.7634149933423993e-07, "epoch": 9.66131907308378, "percentage": 96.61, "elapsed_time": "0:23:42", "remaining_time": "0:00:49", "throughput": 2363.28, "total_tokens": 3362824}
1104
+ {"current_steps": 5425, "total_steps": 5610, "loss": 0.0006, "lr": 1.6723997777614574e-07, "epoch": 9.670231729055258, "percentage": 96.7, "elapsed_time": "0:23:44", "remaining_time": "0:00:48", "throughput": 2363.62, "total_tokens": 3366152}
1105
+ {"current_steps": 5430, "total_steps": 5610, "loss": 0.0002, "lr": 1.5837881149090294e-07, "epoch": 9.679144385026738, "percentage": 96.79, "elapsed_time": "0:23:45", "remaining_time": "0:00:47", "throughput": 2363.84, "total_tokens": 3369192}
1106
+ {"current_steps": 5435, "total_steps": 5610, "loss": 0.0041, "lr": 1.497580862453829e-07, "epoch": 9.688057040998217, "percentage": 96.88, "elapsed_time": "0:23:46", "remaining_time": "0:00:45", "throughput": 2364.29, "total_tokens": 3372776}
1107
+ {"current_steps": 5440, "total_steps": 5610, "loss": 0.0003, "lr": 1.4137788547923246e-07, "epoch": 9.696969696969697, "percentage": 96.97, "elapsed_time": "0:23:47", "remaining_time": "0:00:44", "throughput": 2364.71, "total_tokens": 3376232}
1108
+ {"current_steps": 5445, "total_steps": 5610, "loss": 0.0005, "lr": 1.3323829030407465e-07, "epoch": 9.705882352941176, "percentage": 97.06, "elapsed_time": "0:23:49", "remaining_time": "0:00:43", "throughput": 2365.21, "total_tokens": 3379912}
1109
+ {"current_steps": 5450, "total_steps": 5610, "loss": 0.0005, "lr": 1.2533937950272023e-07, "epoch": 9.714795008912656, "percentage": 97.15, "elapsed_time": "0:23:50", "remaining_time": "0:00:41", "throughput": 2365.43, "total_tokens": 3382824}
1110
+ {"current_steps": 5455, "total_steps": 5610, "loss": 0.0003, "lr": 1.176812295283991e-07, "epoch": 9.723707664884136, "percentage": 97.24, "elapsed_time": "0:23:51", "remaining_time": "0:00:40", "throughput": 2365.52, "total_tokens": 3385640}
1111
+ {"current_steps": 5460, "total_steps": 5610, "loss": 0.0003, "lr": 1.1026391450404128e-07, "epoch": 9.732620320855615, "percentage": 97.33, "elapsed_time": "0:23:52", "remaining_time": "0:00:39", "throughput": 2366.18, "total_tokens": 3389672}
1112
+ {"current_steps": 5465, "total_steps": 5610, "loss": 0.0136, "lr": 1.0308750622153307e-07, "epoch": 9.741532976827095, "percentage": 97.42, "elapsed_time": "0:23:53", "remaining_time": "0:00:38", "throughput": 2366.55, "total_tokens": 3393096}
1113
+ {"current_steps": 5470, "total_steps": 5610, "loss": 0.0002, "lr": 9.615207414103434e-08, "epoch": 9.750445632798574, "percentage": 97.5, "elapsed_time": "0:23:54", "remaining_time": "0:00:36", "throughput": 2366.75, "total_tokens": 3396136}
1114
+ {"current_steps": 5475, "total_steps": 5610, "loss": 0.0809, "lr": 8.945768539031785e-08, "epoch": 9.759358288770054, "percentage": 97.59, "elapsed_time": "0:23:56", "remaining_time": "0:00:35", "throughput": 2367.01, "total_tokens": 3399304}
1115
+ {"current_steps": 5480, "total_steps": 5610, "loss": 0.0007, "lr": 8.30044047640921e-08, "epoch": 9.768270944741532, "percentage": 97.68, "elapsed_time": "0:23:57", "remaining_time": "0:00:34", "throughput": 2367.15, "total_tokens": 3402216}
1116
+ {"current_steps": 5485, "total_steps": 5610, "loss": 0.0003, "lr": 7.679229472340176e-08, "epoch": 9.777183600713013, "percentage": 97.77, "elapsed_time": "0:23:58", "remaining_time": "0:00:32", "throughput": 2367.26, "total_tokens": 3405096}
1117
+ {"current_steps": 5490, "total_steps": 5610, "loss": 0.0246, "lr": 7.082141539500597e-08, "epoch": 9.786096256684491, "percentage": 97.86, "elapsed_time": "0:23:59", "remaining_time": "0:00:31", "throughput": 2367.37, "total_tokens": 3407912}
1118
+ {"current_steps": 5495, "total_steps": 5610, "loss": 0.0004, "lr": 6.509182457080376e-08, "epoch": 9.795008912655971, "percentage": 97.95, "elapsed_time": "0:24:00", "remaining_time": "0:00:30", "throughput": 2367.52, "total_tokens": 3410856}
1119
+ {"current_steps": 5500, "total_steps": 5610, "loss": 0.0003, "lr": 5.9603577707267875e-08, "epoch": 9.803921568627452, "percentage": 98.04, "elapsed_time": "0:24:01", "remaining_time": "0:00:28", "throughput": 2367.75, "total_tokens": 3413928}
1120
+ {"current_steps": 5505, "total_steps": 5610, "loss": 0.0007, "lr": 5.435672792491742e-08, "epoch": 9.81283422459893, "percentage": 98.13, "elapsed_time": "0:24:03", "remaining_time": "0:00:27", "throughput": 2368.15, "total_tokens": 3417416}
1121
+ {"current_steps": 5510, "total_steps": 5610, "loss": 0.0002, "lr": 4.935132600780157e-08, "epoch": 9.82174688057041, "percentage": 98.22, "elapsed_time": "0:24:04", "remaining_time": "0:00:26", "throughput": 2368.21, "total_tokens": 3420136}
1122
+ {"current_steps": 5515, "total_steps": 5610, "loss": 0.0003, "lr": 4.4587420402997235e-08, "epoch": 9.830659536541889, "percentage": 98.31, "elapsed_time": "0:24:05", "remaining_time": "0:00:24", "throughput": 2368.47, "total_tokens": 3423272}
1123
+ {"current_steps": 5520, "total_steps": 5610, "loss": 0.0006, "lr": 4.006505722015386e-08, "epoch": 9.83957219251337, "percentage": 98.4, "elapsed_time": "0:24:06", "remaining_time": "0:00:23", "throughput": 2368.76, "total_tokens": 3426472}
1124
+ {"current_steps": 5525, "total_steps": 5610, "loss": 0.0005, "lr": 3.578428023103819e-08, "epoch": 9.848484848484848, "percentage": 98.48, "elapsed_time": "0:24:07", "remaining_time": "0:00:22", "throughput": 2369.17, "total_tokens": 3429992}
1125
+ {"current_steps": 5530, "total_steps": 5610, "loss": 0.0003, "lr": 3.1745130869123566e-08, "epoch": 9.857397504456328, "percentage": 98.57, "elapsed_time": "0:24:08", "remaining_time": "0:00:20", "throughput": 2369.12, "total_tokens": 3432456}
1126
+ {"current_steps": 5535, "total_steps": 5610, "loss": 0.0003, "lr": 2.794764822916518e-08, "epoch": 9.866310160427808, "percentage": 98.66, "elapsed_time": "0:24:09", "remaining_time": "0:00:19", "throughput": 2369.08, "total_tokens": 3434888}
1127
+ {"current_steps": 5540, "total_steps": 5610, "loss": 0.0003, "lr": 2.4391869066844874e-08, "epoch": 9.875222816399287, "percentage": 98.75, "elapsed_time": "0:24:11", "remaining_time": "0:00:18", "throughput": 2369.27, "total_tokens": 3437832}
1128
+ {"current_steps": 5545, "total_steps": 5610, "loss": 0.0002, "lr": 2.1077827798404726e-08, "epoch": 9.884135472370767, "percentage": 98.84, "elapsed_time": "0:24:12", "remaining_time": "0:00:17", "throughput": 2369.5, "total_tokens": 3440872}
1129
+ {"current_steps": 5550, "total_steps": 5610, "loss": 0.001, "lr": 1.8005556500313993e-08, "epoch": 9.893048128342246, "percentage": 98.93, "elapsed_time": "0:24:13", "remaining_time": "0:00:15", "throughput": 2369.64, "total_tokens": 3443784}
1130
+ {"current_steps": 5555, "total_steps": 5610, "loss": 0.0003, "lr": 1.51750849089638e-08, "epoch": 9.901960784313726, "percentage": 99.02, "elapsed_time": "0:24:14", "remaining_time": "0:00:14", "throughput": 2370.17, "total_tokens": 3447592}
1131
+ {"current_steps": 5560, "total_steps": 5610, "loss": 0.0011, "lr": 1.2586440420372936e-08, "epoch": 9.910873440285204, "percentage": 99.11, "elapsed_time": "0:24:15", "remaining_time": "0:00:13", "throughput": 2370.56, "total_tokens": 3451048}
1132
+ {"current_steps": 5565, "total_steps": 5610, "loss": 0.0004, "lr": 1.023964808992417e-08, "epoch": 9.919786096256685, "percentage": 99.2, "elapsed_time": "0:24:16", "remaining_time": "0:00:11", "throughput": 2370.69, "total_tokens": 3453928}
1133
+ {"current_steps": 5570, "total_steps": 5610, "loss": 0.0004, "lr": 8.134730632125554e-09, "epoch": 9.928698752228165, "percentage": 99.29, "elapsed_time": "0:24:18", "remaining_time": "0:00:10", "throughput": 2370.9, "total_tokens": 3456968}
1134
+ {"current_steps": 5575, "total_steps": 5610, "loss": 0.0004, "lr": 6.271708420385603e-09, "epoch": 9.937611408199643, "percentage": 99.38, "elapsed_time": "0:24:19", "remaining_time": "0:00:09", "throughput": 2371.35, "total_tokens": 3460616}
1135
+ {"current_steps": 5580, "total_steps": 5610, "loss": 0.0003, "lr": 4.650599486827334e-09, "epoch": 9.946524064171124, "percentage": 99.47, "elapsed_time": "0:24:20", "remaining_time": "0:00:07", "throughput": 2371.55, "total_tokens": 3463592}
1136
+ {"current_steps": 5585, "total_steps": 5610, "loss": 0.0007, "lr": 3.2714195220912013e-09, "epoch": 9.955436720142602, "percentage": 99.55, "elapsed_time": "0:24:21", "remaining_time": "0:00:06", "throughput": 2371.89, "total_tokens": 3466888}
1137
+ {"current_steps": 5590, "total_steps": 5610, "loss": 0.0006, "lr": 2.134181875204644e-09, "epoch": 9.964349376114082, "percentage": 99.64, "elapsed_time": "0:24:22", "remaining_time": "0:00:05", "throughput": 2372.27, "total_tokens": 3470408}
1138
+ {"current_steps": 5595, "total_steps": 5610, "loss": 0.0005, "lr": 1.2388975534460834e-09, "epoch": 9.973262032085561, "percentage": 99.73, "elapsed_time": "0:24:24", "remaining_time": "0:00:03", "throughput": 2372.53, "total_tokens": 3473608}
1139
+ {"current_steps": 5600, "total_steps": 5610, "loss": 0.0003, "lr": 5.855752222366783e-10, "epoch": 9.982174688057041, "percentage": 99.82, "elapsed_time": "0:24:25", "remaining_time": "0:00:02", "throughput": 2372.68, "total_tokens": 3476616}
1140
+ {"current_steps": 5605, "total_steps": 5610, "loss": 0.0005, "lr": 1.7422120505705686e-10, "epoch": 9.99108734402852, "percentage": 99.91, "elapsed_time": "0:24:26", "remaining_time": "0:00:01", "throughput": 2372.88, "total_tokens": 3479624}
1141
+ {"current_steps": 5610, "total_steps": 5610, "loss": 0.0002, "lr": 4.839483383478616e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:24:27", "remaining_time": "0:00:00", "throughput": 2372.54, "total_tokens": 3481336}
1142
+ {"current_steps": 5610, "total_steps": 5610, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:24:28", "remaining_time": "0:00:00", "throughput": 2370.22, "total_tokens": 3481336}