rbelanec commited on
Commit
c3c7b90
·
verified ·
1 Parent(s): e84eb0c

Training in progress, step 35000

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +40 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7398eb8eadc1164503ea24ffc8a5bdc89cd2b280a259d7029f9d0708f9cb3657
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ac5472866fbaf1b165ceb5ffd91223304b731c556fd0f369c22f792202769d
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -7134,3 +7134,43 @@
7134
  {"current_steps": 34800, "total_steps": 40000, "eval_loss": 0.2789691686630249, "epoch": 1.1138851545995774, "percentage": 87.0, "elapsed_time": "1 day, 9:47:23", "remaining_time": "5:02:56", "throughput": 435.37, "total_tokens": 52960256}
7135
  {"current_steps": 34805, "total_steps": 40000, "loss": 0.1906, "lr": 2.0530162979471385e-06, "epoch": 1.1140451955700659, "percentage": 87.01, "elapsed_time": "1 day, 9:47:28", "remaining_time": "5:02:37", "throughput": 435.42, "total_tokens": 52967824}
7136
  {"current_steps": 34810, "total_steps": 40000, "loss": 0.1122, "lr": 2.0491219078834667e-06, "epoch": 1.1142052365405544, "percentage": 87.02, "elapsed_time": "1 day, 9:47:31", "remaining_time": "5:02:17", "throughput": 435.47, "total_tokens": 52975728}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7134
  {"current_steps": 34800, "total_steps": 40000, "eval_loss": 0.2789691686630249, "epoch": 1.1138851545995774, "percentage": 87.0, "elapsed_time": "1 day, 9:47:23", "remaining_time": "5:02:56", "throughput": 435.37, "total_tokens": 52960256}
7135
  {"current_steps": 34805, "total_steps": 40000, "loss": 0.1906, "lr": 2.0530162979471385e-06, "epoch": 1.1140451955700659, "percentage": 87.01, "elapsed_time": "1 day, 9:47:28", "remaining_time": "5:02:37", "throughput": 435.42, "total_tokens": 52967824}
7136
  {"current_steps": 34810, "total_steps": 40000, "loss": 0.1122, "lr": 2.0491219078834667e-06, "epoch": 1.1142052365405544, "percentage": 87.02, "elapsed_time": "1 day, 9:47:31", "remaining_time": "5:02:17", "throughput": 435.47, "total_tokens": 52975728}
7137
+ {"current_steps": 34815, "total_steps": 40000, "loss": 0.1018, "lr": 2.045231057133612e-06, "epoch": 1.1143652775110429, "percentage": 87.04, "elapsed_time": "1 day, 9:47:34", "remaining_time": "5:01:57", "throughput": 435.52, "total_tokens": 52983408}
7138
+ {"current_steps": 34820, "total_steps": 40000, "loss": 0.2685, "lr": 2.0413437462975944e-06, "epoch": 1.1145253184815314, "percentage": 87.05, "elapsed_time": "1 day, 9:47:37", "remaining_time": "5:01:38", "throughput": 435.58, "total_tokens": 52991824}
7139
+ {"current_steps": 34825, "total_steps": 40000, "loss": 0.2207, "lr": 2.0374599759748843e-06, "epoch": 1.1146853594520196, "percentage": 87.06, "elapsed_time": "1 day, 9:47:40", "remaining_time": "5:01:18", "throughput": 435.63, "total_tokens": 52999120}
7140
+ {"current_steps": 34830, "total_steps": 40000, "loss": 0.1965, "lr": 2.033579746764419e-06, "epoch": 1.1148454004225081, "percentage": 87.08, "elapsed_time": "1 day, 9:47:43", "remaining_time": "5:00:59", "throughput": 435.69, "total_tokens": 53006880}
7141
+ {"current_steps": 34835, "total_steps": 40000, "loss": 0.2068, "lr": 2.029703059264565e-06, "epoch": 1.1150054413929966, "percentage": 87.09, "elapsed_time": "1 day, 9:47:46", "remaining_time": "5:00:39", "throughput": 435.74, "total_tokens": 53014784}
7142
+ {"current_steps": 34840, "total_steps": 40000, "loss": 0.1568, "lr": 2.02582991407316e-06, "epoch": 1.1151654823634851, "percentage": 87.1, "elapsed_time": "1 day, 9:47:49", "remaining_time": "5:00:19", "throughput": 435.79, "total_tokens": 53022304}
7143
+ {"current_steps": 34845, "total_steps": 40000, "loss": 0.3809, "lr": 2.0219603117874992e-06, "epoch": 1.1153255233339734, "percentage": 87.11, "elapsed_time": "1 day, 9:47:52", "remaining_time": "5:00:00", "throughput": 435.84, "total_tokens": 53029888}
7144
+ {"current_steps": 34850, "total_steps": 40000, "loss": 0.1531, "lr": 2.0180942530043156e-06, "epoch": 1.1154855643044619, "percentage": 87.12, "elapsed_time": "1 day, 9:47:55", "remaining_time": "4:59:40", "throughput": 435.9, "total_tokens": 53038160}
7145
+ {"current_steps": 34855, "total_steps": 40000, "loss": 0.1999, "lr": 2.0142317383198107e-06, "epoch": 1.1156456052749504, "percentage": 87.14, "elapsed_time": "1 day, 9:47:58", "remaining_time": "4:59:21", "throughput": 435.95, "total_tokens": 53045840}
7146
+ {"current_steps": 34860, "total_steps": 40000, "loss": 0.2962, "lr": 2.0103727683296243e-06, "epoch": 1.1158056462454389, "percentage": 87.15, "elapsed_time": "1 day, 9:48:01", "remaining_time": "4:59:01", "throughput": 436.01, "total_tokens": 53054032}
7147
+ {"current_steps": 34865, "total_steps": 40000, "loss": 0.1083, "lr": 2.0065173436288636e-06, "epoch": 1.1159656872159274, "percentage": 87.16, "elapsed_time": "1 day, 9:48:03", "remaining_time": "4:58:41", "throughput": 436.06, "total_tokens": 53061552}
7148
+ {"current_steps": 34870, "total_steps": 40000, "loss": 0.1135, "lr": 2.002665464812087e-06, "epoch": 1.1161257281864156, "percentage": 87.17, "elapsed_time": "1 day, 9:48:06", "remaining_time": "4:58:22", "throughput": 436.11, "total_tokens": 53069280}
7149
+ {"current_steps": 34875, "total_steps": 40000, "loss": 0.1573, "lr": 1.998817132473291e-06, "epoch": 1.1162857691569041, "percentage": 87.19, "elapsed_time": "1 day, 9:48:09", "remaining_time": "4:58:02", "throughput": 436.17, "total_tokens": 53077136}
7150
+ {"current_steps": 34880, "total_steps": 40000, "loss": 0.2039, "lr": 1.9949723472059507e-06, "epoch": 1.1164458101273926, "percentage": 87.2, "elapsed_time": "1 day, 9:48:12", "remaining_time": "4:57:43", "throughput": 436.22, "total_tokens": 53084528}
7151
+ {"current_steps": 34885, "total_steps": 40000, "loss": 0.1857, "lr": 1.9911311096029726e-06, "epoch": 1.1166058510978811, "percentage": 87.21, "elapsed_time": "1 day, 9:48:15", "remaining_time": "4:57:23", "throughput": 436.27, "total_tokens": 53092688}
7152
+ {"current_steps": 34890, "total_steps": 40000, "loss": 0.4212, "lr": 1.9872934202567224e-06, "epoch": 1.1167658920683694, "percentage": 87.22, "elapsed_time": "1 day, 9:48:18", "remaining_time": "4:57:03", "throughput": 436.33, "total_tokens": 53100144}
7153
+ {"current_steps": 34895, "total_steps": 40000, "loss": 0.1964, "lr": 1.9834592797590257e-06, "epoch": 1.116925933038858, "percentage": 87.24, "elapsed_time": "1 day, 9:48:21", "remaining_time": "4:56:44", "throughput": 436.38, "total_tokens": 53107664}
7154
+ {"current_steps": 34900, "total_steps": 40000, "loss": 0.132, "lr": 1.979628688701149e-06, "epoch": 1.1170859740093464, "percentage": 87.25, "elapsed_time": "1 day, 9:48:24", "remaining_time": "4:56:24", "throughput": 436.43, "total_tokens": 53115504}
7155
+ {"current_steps": 34905, "total_steps": 40000, "loss": 0.2409, "lr": 1.9758016476738193e-06, "epoch": 1.117246014979835, "percentage": 87.26, "elapsed_time": "1 day, 9:48:26", "remaining_time": "4:56:05", "throughput": 436.48, "total_tokens": 53122560}
7156
+ {"current_steps": 34910, "total_steps": 40000, "loss": 0.1982, "lr": 1.971978157267221e-06, "epoch": 1.1174060559503234, "percentage": 87.28, "elapsed_time": "1 day, 9:48:29", "remaining_time": "4:55:45", "throughput": 436.53, "total_tokens": 53129936}
7157
+ {"current_steps": 34915, "total_steps": 40000, "loss": 0.1743, "lr": 1.968158218070973e-06, "epoch": 1.1175660969208117, "percentage": 87.29, "elapsed_time": "1 day, 9:48:32", "remaining_time": "4:55:26", "throughput": 436.59, "total_tokens": 53137792}
7158
+ {"current_steps": 34920, "total_steps": 40000, "loss": 0.2766, "lr": 1.9643418306741682e-06, "epoch": 1.1177261378913002, "percentage": 87.3, "elapsed_time": "1 day, 9:48:35", "remaining_time": "4:55:06", "throughput": 436.64, "total_tokens": 53145088}
7159
+ {"current_steps": 34925, "total_steps": 40000, "loss": 0.1563, "lr": 1.9605289956653337e-06, "epoch": 1.1178861788617886, "percentage": 87.31, "elapsed_time": "1 day, 9:48:38", "remaining_time": "4:54:47", "throughput": 436.69, "total_tokens": 53153280}
7160
+ {"current_steps": 34930, "total_steps": 40000, "loss": 0.1146, "lr": 1.9567197136324626e-06, "epoch": 1.1180462198322771, "percentage": 87.33, "elapsed_time": "1 day, 9:48:40", "remaining_time": "4:54:27", "throughput": 436.74, "total_tokens": 53160832}
7161
+ {"current_steps": 34935, "total_steps": 40000, "loss": 0.3305, "lr": 1.9529139851629935e-06, "epoch": 1.1182062608027654, "percentage": 87.34, "elapsed_time": "1 day, 9:48:43", "remaining_time": "4:54:07", "throughput": 436.79, "total_tokens": 53168320}
7162
+ {"current_steps": 34940, "total_steps": 40000, "loss": 0.1086, "lr": 1.949111810843812e-06, "epoch": 1.118366301773254, "percentage": 87.35, "elapsed_time": "1 day, 9:48:46", "remaining_time": "4:53:48", "throughput": 436.84, "total_tokens": 53175392}
7163
+ {"current_steps": 34945, "total_steps": 40000, "loss": 0.1682, "lr": 1.9453131912612694e-06, "epoch": 1.1185263427437424, "percentage": 87.36, "elapsed_time": "1 day, 9:48:49", "remaining_time": "4:53:28", "throughput": 436.9, "total_tokens": 53183040}
7164
+ {"current_steps": 34950, "total_steps": 40000, "loss": 0.144, "lr": 1.941518127001149e-06, "epoch": 1.118686383714231, "percentage": 87.38, "elapsed_time": "1 day, 9:48:52", "remaining_time": "4:53:09", "throughput": 436.95, "total_tokens": 53190480}
7165
+ {"current_steps": 34955, "total_steps": 40000, "loss": 0.179, "lr": 1.9377266186487107e-06, "epoch": 1.1188464246847194, "percentage": 87.39, "elapsed_time": "1 day, 9:48:54", "remaining_time": "4:52:49", "throughput": 437.0, "total_tokens": 53197968}
7166
+ {"current_steps": 34960, "total_steps": 40000, "loss": 0.1713, "lr": 1.9339386667886483e-06, "epoch": 1.1190064656552077, "percentage": 87.4, "elapsed_time": "1 day, 9:48:57", "remaining_time": "4:52:30", "throughput": 437.05, "total_tokens": 53205424}
7167
+ {"current_steps": 34965, "total_steps": 40000, "loss": 0.1643, "lr": 1.9301542720051024e-06, "epoch": 1.1191665066256962, "percentage": 87.41, "elapsed_time": "1 day, 9:49:00", "remaining_time": "4:52:10", "throughput": 437.1, "total_tokens": 53212560}
7168
+ {"current_steps": 34970, "total_steps": 40000, "loss": 0.3027, "lr": 1.926373434881684e-06, "epoch": 1.1193265475961847, "percentage": 87.42, "elapsed_time": "1 day, 9:49:03", "remaining_time": "4:51:51", "throughput": 437.15, "total_tokens": 53219968}
7169
+ {"current_steps": 34975, "total_steps": 40000, "loss": 0.1099, "lr": 1.9225961560014468e-06, "epoch": 1.1194865885666732, "percentage": 87.44, "elapsed_time": "1 day, 9:49:06", "remaining_time": "4:51:31", "throughput": 437.2, "total_tokens": 53227744}
7170
+ {"current_steps": 34980, "total_steps": 40000, "loss": 0.2152, "lr": 1.918822435946885e-06, "epoch": 1.1196466295371614, "percentage": 87.45, "elapsed_time": "1 day, 9:49:08", "remaining_time": "4:51:12", "throughput": 437.25, "total_tokens": 53235136}
7171
+ {"current_steps": 34985, "total_steps": 40000, "loss": 0.3035, "lr": 1.915052275299961e-06, "epoch": 1.11980667050765, "percentage": 87.46, "elapsed_time": "1 day, 9:49:11", "remaining_time": "4:50:52", "throughput": 437.31, "total_tokens": 53242880}
7172
+ {"current_steps": 34990, "total_steps": 40000, "loss": 0.2229, "lr": 1.9112856746420854e-06, "epoch": 1.1199667114781384, "percentage": 87.48, "elapsed_time": "1 day, 9:49:14", "remaining_time": "4:50:33", "throughput": 437.36, "total_tokens": 53251088}
7173
+ {"current_steps": 34995, "total_steps": 40000, "loss": 0.1433, "lr": 1.907522634554104e-06, "epoch": 1.120126752448627, "percentage": 87.49, "elapsed_time": "1 day, 9:49:17", "remaining_time": "4:50:13", "throughput": 437.42, "total_tokens": 53258640}
7174
+ {"current_steps": 35000, "total_steps": 40000, "loss": 0.1544, "lr": 1.9037631556163337e-06, "epoch": 1.1202867934191152, "percentage": 87.5, "elapsed_time": "1 day, 9:49:20", "remaining_time": "4:49:54", "throughput": 437.47, "total_tokens": 53265840}
7175
+ {"current_steps": 35000, "total_steps": 40000, "eval_loss": 0.27802976965904236, "epoch": 1.1202867934191152, "percentage": 87.5, "elapsed_time": "1 day, 9:59:02", "remaining_time": "4:51:17", "throughput": 435.38, "total_tokens": 53265840}
7176
+ {"current_steps": 35005, "total_steps": 40000, "loss": 0.1719, "lr": 1.9000072384085272e-06, "epoch": 1.1204468343896037, "percentage": 87.51, "elapsed_time": "1 day, 9:59:07", "remaining_time": "4:50:58", "throughput": 435.43, "total_tokens": 53274336}