rbelanec commited on
Commit
15f1f1e
verified
1 Parent(s): b4e4820

Training in progress, step 800

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a781b1f8e84e189944f48f79ebf50782f956b9741f89356f14e0cecbba70d5
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2dc79f36afe51a0672688935070bbcd3c4028604b0d6b6c367642271f00ecd2
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -121,3 +121,44 @@
121
  {"current_steps": 595, "total_steps": 40000, "loss": 0.6038, "lr": 4.997279906108211e-05, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:15:59", "remaining_time": "17:39:18", "throughput": 3628.15, "total_tokens": 3481952}
122
  {"current_steps": 600, "total_steps": 40000, "loss": 0.7324, "lr": 4.9972339290328155e-05, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:03", "remaining_time": "17:35:00", "throughput": 3642.95, "total_tokens": 3511712}
123
  {"current_steps": 600, "total_steps": 40000, "eval_loss": 0.6449815034866333, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:19:48", "remaining_time": "21:40:53", "throughput": 2954.4, "total_tokens": 3511712}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  {"current_steps": 595, "total_steps": 40000, "loss": 0.6038, "lr": 4.997279906108211e-05, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:15:59", "remaining_time": "17:39:18", "throughput": 3628.15, "total_tokens": 3481952}
122
  {"current_steps": 600, "total_steps": 40000, "loss": 0.7324, "lr": 4.9972339290328155e-05, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:03", "remaining_time": "17:35:00", "throughput": 3642.95, "total_tokens": 3511712}
123
  {"current_steps": 600, "total_steps": 40000, "eval_loss": 0.6449815034866333, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:19:48", "remaining_time": "21:40:53", "throughput": 2954.4, "total_tokens": 3511712}
124
+ {"current_steps": 605, "total_steps": 40000, "loss": 0.6476, "lr": 4.9971875668525646e-05, "epoch": 0.0774598297164074, "percentage": 1.51, "elapsed_time": "0:19:54", "remaining_time": "21:36:40", "throughput": 2963.09, "total_tokens": 3540288}
125
+ {"current_steps": 610, "total_steps": 40000, "loss": 0.7037, "lr": 4.997140819574609e-05, "epoch": 0.07809999359836119, "percentage": 1.52, "elapsed_time": "0:19:59", "remaining_time": "21:30:25", "throughput": 2977.31, "total_tokens": 3569856}
126
+ {"current_steps": 615, "total_steps": 40000, "loss": 0.6651, "lr": 4.997093687206159e-05, "epoch": 0.07874015748031496, "percentage": 1.54, "elapsed_time": "0:20:03", "remaining_time": "21:24:16", "throughput": 2990.84, "total_tokens": 3598720}
127
+ {"current_steps": 620, "total_steps": 40000, "loss": 0.6294, "lr": 4.997046169754482e-05, "epoch": 0.07938032136226875, "percentage": 1.55, "elapsed_time": "0:20:07", "remaining_time": "21:18:13", "throughput": 3004.58, "total_tokens": 3627936}
128
+ {"current_steps": 625, "total_steps": 40000, "loss": 0.6613, "lr": 4.996998267226905e-05, "epoch": 0.08002048524422252, "percentage": 1.56, "elapsed_time": "0:20:11", "remaining_time": "21:12:16", "throughput": 3018.54, "total_tokens": 3657536}
129
+ {"current_steps": 630, "total_steps": 40000, "loss": 0.6997, "lr": 4.996949979630817e-05, "epoch": 0.0806606491261763, "percentage": 1.57, "elapsed_time": "0:20:15", "remaining_time": "21:06:24", "throughput": 3031.63, "total_tokens": 3686176}
130
+ {"current_steps": 635, "total_steps": 40000, "loss": 0.5562, "lr": 4.996901306973663e-05, "epoch": 0.08130081300813008, "percentage": 1.59, "elapsed_time": "0:20:20", "remaining_time": "21:00:45", "throughput": 3045.48, "total_tokens": 3716224}
131
+ {"current_steps": 640, "total_steps": 40000, "loss": 0.6569, "lr": 4.996852249262949e-05, "epoch": 0.08194097689008387, "percentage": 1.6, "elapsed_time": "0:20:24", "remaining_time": "20:55:04", "throughput": 3058.23, "total_tokens": 3744672}
132
+ {"current_steps": 645, "total_steps": 40000, "loss": 0.6224, "lr": 4.996802806506241e-05, "epoch": 0.08258114077203764, "percentage": 1.61, "elapsed_time": "0:20:28", "remaining_time": "20:49:33", "throughput": 3072.41, "total_tokens": 3775232}
133
+ {"current_steps": 650, "total_steps": 40000, "loss": 0.5666, "lr": 4.996752978711164e-05, "epoch": 0.08322130465399143, "percentage": 1.62, "elapsed_time": "0:20:32", "remaining_time": "20:44:03", "throughput": 3085.67, "total_tokens": 3804608}
134
+ {"current_steps": 655, "total_steps": 40000, "loss": 0.5693, "lr": 4.996702765885401e-05, "epoch": 0.0838614685359452, "percentage": 1.64, "elapsed_time": "0:20:37", "remaining_time": "20:38:40", "throughput": 3099.82, "total_tokens": 3835296}
135
+ {"current_steps": 660, "total_steps": 40000, "loss": 0.6508, "lr": 4.9966521680366964e-05, "epoch": 0.08450163241789899, "percentage": 1.65, "elapsed_time": "0:20:41", "remaining_time": "20:33:21", "throughput": 3113.29, "total_tokens": 3865184}
136
+ {"current_steps": 665, "total_steps": 40000, "loss": 0.5583, "lr": 4.9966011851728524e-05, "epoch": 0.08514179629985276, "percentage": 1.66, "elapsed_time": "0:20:45", "remaining_time": "20:28:07", "throughput": 3126.54, "total_tokens": 3894912}
137
+ {"current_steps": 670, "total_steps": 40000, "loss": 0.6213, "lr": 4.996549817301731e-05, "epoch": 0.08578196018180655, "percentage": 1.68, "elapsed_time": "0:20:49", "remaining_time": "20:22:55", "throughput": 3139.13, "total_tokens": 3923840}
138
+ {"current_steps": 675, "total_steps": 40000, "loss": 0.6578, "lr": 4.9964980644312544e-05, "epoch": 0.08642212406376032, "percentage": 1.69, "elapsed_time": "0:20:54", "remaining_time": "20:17:48", "throughput": 3151.82, "total_tokens": 3952992}
139
+ {"current_steps": 680, "total_steps": 40000, "loss": 0.5537, "lr": 4.996445926569403e-05, "epoch": 0.0870622879457141, "percentage": 1.7, "elapsed_time": "0:20:58", "remaining_time": "20:12:47", "throughput": 3164.52, "total_tokens": 3982336}
140
+ {"current_steps": 685, "total_steps": 40000, "loss": 0.6498, "lr": 4.996393403724218e-05, "epoch": 0.08770245182766788, "percentage": 1.71, "elapsed_time": "0:21:02", "remaining_time": "20:07:51", "throughput": 3177.67, "total_tokens": 4012416}
141
+ {"current_steps": 690, "total_steps": 40000, "loss": 0.549, "lr": 4.9963404959037985e-05, "epoch": 0.08834261570962167, "percentage": 1.73, "elapsed_time": "0:21:06", "remaining_time": "20:02:56", "throughput": 3189.59, "total_tokens": 4040864}
142
+ {"current_steps": 695, "total_steps": 40000, "loss": 0.6214, "lr": 4.996287203116303e-05, "epoch": 0.08898277959157544, "percentage": 1.74, "elapsed_time": "0:21:11", "remaining_time": "19:58:06", "throughput": 3202.06, "total_tokens": 4070208}
143
+ {"current_steps": 700, "total_steps": 40000, "loss": 0.6793, "lr": 4.996233525369951e-05, "epoch": 0.08962294347352923, "percentage": 1.75, "elapsed_time": "0:21:15", "remaining_time": "19:53:25", "throughput": 3214.83, "total_tokens": 4100224}
144
+ {"current_steps": 705, "total_steps": 40000, "loss": 0.6036, "lr": 4.99617946267302e-05, "epoch": 0.090263107355483, "percentage": 1.76, "elapsed_time": "0:21:19", "remaining_time": "19:48:42", "throughput": 3226.5, "total_tokens": 4128640}
145
+ {"current_steps": 710, "total_steps": 40000, "loss": 0.658, "lr": 4.996125015033846e-05, "epoch": 0.09090327123743679, "percentage": 1.77, "elapsed_time": "0:21:23", "remaining_time": "19:44:03", "throughput": 3238.7, "total_tokens": 4157888}
146
+ {"current_steps": 715, "total_steps": 40000, "loss": 0.5575, "lr": 4.996070182460827e-05, "epoch": 0.09154343511939056, "percentage": 1.79, "elapsed_time": "0:21:28", "remaining_time": "19:39:29", "throughput": 3250.41, "total_tokens": 4186592}
147
+ {"current_steps": 720, "total_steps": 40000, "loss": 0.5941, "lr": 4.996014964962418e-05, "epoch": 0.09218359900134435, "percentage": 1.8, "elapsed_time": "0:21:32", "remaining_time": "19:35:00", "throughput": 3262.28, "total_tokens": 4215712}
148
+ {"current_steps": 725, "total_steps": 40000, "loss": 0.6708, "lr": 4.9959593625471344e-05, "epoch": 0.09282376288329812, "percentage": 1.81, "elapsed_time": "0:21:36", "remaining_time": "19:30:33", "throughput": 3274.04, "total_tokens": 4244736}
149
+ {"current_steps": 730, "total_steps": 40000, "loss": 0.6292, "lr": 4.995903375223552e-05, "epoch": 0.0934639267652519, "percentage": 1.82, "elapsed_time": "0:21:40", "remaining_time": "19:26:11", "throughput": 3285.9, "total_tokens": 4274048}
150
+ {"current_steps": 735, "total_steps": 40000, "loss": 0.6339, "lr": 4.995847003000302e-05, "epoch": 0.09410409064720568, "percentage": 1.84, "elapsed_time": "0:21:44", "remaining_time": "19:21:51", "throughput": 3296.95, "total_tokens": 4302272}
151
+ {"current_steps": 740, "total_steps": 40000, "loss": 0.5888, "lr": 4.9957902458860804e-05, "epoch": 0.09474425452915947, "percentage": 1.85, "elapsed_time": "0:21:49", "remaining_time": "19:17:35", "throughput": 3308.46, "total_tokens": 4331264}
152
+ {"current_steps": 745, "total_steps": 40000, "loss": 0.5633, "lr": 4.995733103889639e-05, "epoch": 0.09538441841111324, "percentage": 1.86, "elapsed_time": "0:21:53", "remaining_time": "19:13:24", "throughput": 3320.52, "total_tokens": 4361120}
153
+ {"current_steps": 750, "total_steps": 40000, "loss": 0.6356, "lr": 4.99567557701979e-05, "epoch": 0.09602458229306703, "percentage": 1.88, "elapsed_time": "0:21:57", "remaining_time": "19:09:16", "throughput": 3332.69, "total_tokens": 4391264}
154
+ {"current_steps": 755, "total_steps": 40000, "loss": 0.6589, "lr": 4.995617665285403e-05, "epoch": 0.0966647461750208, "percentage": 1.89, "elapsed_time": "0:22:01", "remaining_time": "19:05:11", "throughput": 3344.25, "total_tokens": 4420704}
155
+ {"current_steps": 760, "total_steps": 40000, "loss": 0.6018, "lr": 4.99555936869541e-05, "epoch": 0.09730491005697459, "percentage": 1.9, "elapsed_time": "0:22:06", "remaining_time": "19:01:08", "throughput": 3355.48, "total_tokens": 4449696}
156
+ {"current_steps": 765, "total_steps": 40000, "loss": 0.654, "lr": 4.995500687258803e-05, "epoch": 0.09794507393892836, "percentage": 1.91, "elapsed_time": "0:22:10", "remaining_time": "18:57:08", "throughput": 3366.65, "total_tokens": 4478688}
157
+ {"current_steps": 770, "total_steps": 40000, "loss": 0.6595, "lr": 4.995441620984628e-05, "epoch": 0.09858523782088215, "percentage": 1.93, "elapsed_time": "0:22:14", "remaining_time": "18:53:10", "throughput": 3377.47, "total_tokens": 4507264}
158
+ {"current_steps": 775, "total_steps": 40000, "loss": 0.5934, "lr": 4.995382169881996e-05, "epoch": 0.09922540170283592, "percentage": 1.94, "elapsed_time": "0:22:18", "remaining_time": "18:49:17", "throughput": 3388.28, "total_tokens": 4536000}
159
+ {"current_steps": 780, "total_steps": 40000, "loss": 0.6718, "lr": 4.9953223339600755e-05, "epoch": 0.0998655655847897, "percentage": 1.95, "elapsed_time": "0:22:22", "remaining_time": "18:45:27", "throughput": 3399.91, "total_tokens": 4566016}
160
+ {"current_steps": 785, "total_steps": 40000, "loss": 0.6596, "lr": 4.995262113228091e-05, "epoch": 0.1005057294667435, "percentage": 1.96, "elapsed_time": "0:22:27", "remaining_time": "18:41:42", "throughput": 3411.07, "total_tokens": 4595584}
161
+ {"current_steps": 790, "total_steps": 40000, "loss": 0.5254, "lr": 4.995201507695332e-05, "epoch": 0.10114589334869727, "percentage": 1.98, "elapsed_time": "0:22:31", "remaining_time": "18:38:02", "throughput": 3423.14, "total_tokens": 4626624}
162
+ {"current_steps": 795, "total_steps": 40000, "loss": 0.6161, "lr": 4.995140517371144e-05, "epoch": 0.10178605723065105, "percentage": 1.99, "elapsed_time": "0:22:35", "remaining_time": "18:34:23", "throughput": 3434.53, "total_tokens": 4656704}
163
+ {"current_steps": 800, "total_steps": 40000, "loss": 0.6898, "lr": 4.995079142264932e-05, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:22:40", "remaining_time": "18:30:44", "throughput": 3445.76, "total_tokens": 4686560}
164
+ {"current_steps": 800, "total_steps": 40000, "eval_loss": 0.6095894575119019, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:26:28", "remaining_time": "21:37:03", "throughput": 2950.8, "total_tokens": 4686560}