rbelanec commited on
Commit
a1848eb
·
verified ·
1 Parent(s): 2a345d3

Training in progress, step 39600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e487d2288d2b7d98c2643a7492651e679b57a679e682ad5313286c294dd5af1
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5be56e1537d98ba9fa61639378a6aa6ab0644721408d33d1a90b53fd15b818cf
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -8076,3 +8076,44 @@
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0, "lr": 2.784569627101996e-08, "epoch": 437.77777777777777, "percentage": 98.5, "elapsed_time": "4:28:32", "remaining_time": "0:04:05", "throughput": 685.09, "total_tokens": 11038352}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.39003992080688477, "epoch": 437.77777777777777, "percentage": 98.5, "elapsed_time": "4:28:33", "remaining_time": "0:04:05", "throughput": 685.04, "total_tokens": 11038352}
8078
  {"current_steps": 39405, "total_steps": 40000, "loss": 0.0, "lr": 2.738438508909924e-08, "epoch": 437.8333333333333, "percentage": 98.51, "elapsed_time": "4:28:36", "remaining_time": "0:04:03", "throughput": 684.98, "total_tokens": 11039744}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0, "lr": 2.784569627101996e-08, "epoch": 437.77777777777777, "percentage": 98.5, "elapsed_time": "4:28:32", "remaining_time": "0:04:05", "throughput": 685.09, "total_tokens": 11038352}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.39003992080688477, "epoch": 437.77777777777777, "percentage": 98.5, "elapsed_time": "4:28:33", "remaining_time": "0:04:05", "throughput": 685.04, "total_tokens": 11038352}
8078
  {"current_steps": 39405, "total_steps": 40000, "loss": 0.0, "lr": 2.738438508909924e-08, "epoch": 437.8333333333333, "percentage": 98.51, "elapsed_time": "4:28:36", "remaining_time": "0:04:03", "throughput": 684.98, "total_tokens": 11039744}
8079
+ {"current_steps": 39410, "total_steps": 40000, "loss": 0.0, "lr": 2.692692499833005e-08, "epoch": 437.8888888888889, "percentage": 98.52, "elapsed_time": "4:28:38", "remaining_time": "0:04:01", "throughput": 684.98, "total_tokens": 11041136}
8080
+ {"current_steps": 39415, "total_steps": 40000, "loss": 0.0, "lr": 2.647331606926151e-08, "epoch": 437.94444444444446, "percentage": 98.54, "elapsed_time": "4:28:40", "remaining_time": "0:03:59", "throughput": 684.99, "total_tokens": 11042544}
8081
+ {"current_steps": 39420, "total_steps": 40000, "loss": 0.0, "lr": 2.6023558371843225e-08, "epoch": 438.0, "percentage": 98.55, "elapsed_time": "4:28:42", "remaining_time": "0:03:57", "throughput": 684.99, "total_tokens": 11043936}
8082
+ {"current_steps": 39425, "total_steps": 40000, "loss": 0.0, "lr": 2.557765197543638e-08, "epoch": 438.05555555555554, "percentage": 98.56, "elapsed_time": "4:28:45", "remaining_time": "0:03:55", "throughput": 684.98, "total_tokens": 11045312}
8083
+ {"current_steps": 39430, "total_steps": 40000, "loss": 0.0, "lr": 2.513559694880263e-08, "epoch": 438.1111111111111, "percentage": 98.58, "elapsed_time": "4:28:47", "remaining_time": "0:03:53", "throughput": 684.98, "total_tokens": 11046736}
8084
+ {"current_steps": 39435, "total_steps": 40000, "loss": 0.0, "lr": 2.469739336011523e-08, "epoch": 438.1666666666667, "percentage": 98.59, "elapsed_time": "4:28:48", "remaining_time": "0:03:51", "throughput": 684.99, "total_tokens": 11048112}
8085
+ {"current_steps": 39440, "total_steps": 40000, "loss": 0.0, "lr": 2.4263041276947894e-08, "epoch": 438.22222222222223, "percentage": 98.6, "elapsed_time": "4:28:50", "remaining_time": "0:03:49", "throughput": 684.99, "total_tokens": 11049552}
8086
+ {"current_steps": 39445, "total_steps": 40000, "loss": 0.0, "lr": 2.3832540766283164e-08, "epoch": 438.27777777777777, "percentage": 98.61, "elapsed_time": "4:28:52", "remaining_time": "0:03:46", "throughput": 685.0, "total_tokens": 11050976}
8087
+ {"current_steps": 39450, "total_steps": 40000, "loss": 0.0, "lr": 2.3405891894512366e-08, "epoch": 438.3333333333333, "percentage": 98.62, "elapsed_time": "4:28:54", "remaining_time": "0:03:44", "throughput": 685.0, "total_tokens": 11052400}
8088
+ {"current_steps": 39455, "total_steps": 40000, "loss": 0.0, "lr": 2.29830947274301e-08, "epoch": 438.3888888888889, "percentage": 98.64, "elapsed_time": "4:28:56", "remaining_time": "0:03:42", "throughput": 685.01, "total_tokens": 11053824}
8089
+ {"current_steps": 39460, "total_steps": 40000, "loss": 0.0, "lr": 2.2564149330231432e-08, "epoch": 438.44444444444446, "percentage": 98.65, "elapsed_time": "4:28:58", "remaining_time": "0:03:40", "throughput": 685.01, "total_tokens": 11055168}
8090
+ {"current_steps": 39465, "total_steps": 40000, "loss": 0.0, "lr": 2.2149055767528572e-08, "epoch": 438.5, "percentage": 98.66, "elapsed_time": "4:29:00", "remaining_time": "0:03:38", "throughput": 685.02, "total_tokens": 11056592}
8091
+ {"current_steps": 39470, "total_steps": 40000, "loss": 0.0, "lr": 2.1737814103334197e-08, "epoch": 438.55555555555554, "percentage": 98.67, "elapsed_time": "4:29:02", "remaining_time": "0:03:36", "throughput": 685.02, "total_tokens": 11058000}
8092
+ {"current_steps": 39475, "total_steps": 40000, "loss": 0.0, "lr": 2.1330424401064253e-08, "epoch": 438.6111111111111, "percentage": 98.69, "elapsed_time": "4:29:04", "remaining_time": "0:03:34", "throughput": 685.03, "total_tokens": 11059392}
8093
+ {"current_steps": 39480, "total_steps": 40000, "loss": 0.0, "lr": 2.092688672354348e-08, "epoch": 438.6666666666667, "percentage": 98.7, "elapsed_time": "4:29:06", "remaining_time": "0:03:32", "throughput": 685.03, "total_tokens": 11060784}
8094
+ {"current_steps": 39485, "total_steps": 40000, "loss": 0.0, "lr": 2.0527201133005435e-08, "epoch": 438.72222222222223, "percentage": 98.71, "elapsed_time": "4:29:08", "remaining_time": "0:03:30", "throughput": 685.03, "total_tokens": 11062128}
8095
+ {"current_steps": 39490, "total_steps": 40000, "loss": 0.0, "lr": 2.0131367691084148e-08, "epoch": 438.77777777777777, "percentage": 98.72, "elapsed_time": "4:29:10", "remaining_time": "0:03:28", "throughput": 685.04, "total_tokens": 11063552}
8096
+ {"current_steps": 39495, "total_steps": 40000, "loss": 0.0, "lr": 1.9739386458819675e-08, "epoch": 438.8333333333333, "percentage": 98.74, "elapsed_time": "4:29:12", "remaining_time": "0:03:26", "throughput": 685.04, "total_tokens": 11064992}
8097
+ {"current_steps": 39500, "total_steps": 40000, "loss": 0.0, "lr": 1.9351257496666442e-08, "epoch": 438.8888888888889, "percentage": 98.75, "elapsed_time": "4:29:14", "remaining_time": "0:03:24", "throughput": 685.05, "total_tokens": 11066368}
8098
+ {"current_steps": 39505, "total_steps": 40000, "loss": 0.0, "lr": 1.896698086447657e-08, "epoch": 438.94444444444446, "percentage": 98.76, "elapsed_time": "4:29:16", "remaining_time": "0:03:22", "throughput": 685.05, "total_tokens": 11067792}
8099
+ {"current_steps": 39510, "total_steps": 40000, "loss": 0.0, "lr": 1.8586556621505436e-08, "epoch": 439.0, "percentage": 98.78, "elapsed_time": "4:29:18", "remaining_time": "0:03:20", "throughput": 685.05, "total_tokens": 11069168}
8100
+ {"current_steps": 39515, "total_steps": 40000, "loss": 0.0, "lr": 1.820998482642833e-08, "epoch": 439.05555555555554, "percentage": 98.79, "elapsed_time": "4:29:20", "remaining_time": "0:03:18", "throughput": 685.04, "total_tokens": 11070560}
8101
+ {"current_steps": 39520, "total_steps": 40000, "loss": 0.0, "lr": 1.7837265537309912e-08, "epoch": 439.1111111111111, "percentage": 98.8, "elapsed_time": "4:29:22", "remaining_time": "0:03:16", "throughput": 685.05, "total_tokens": 11072000}
8102
+ {"current_steps": 39525, "total_steps": 40000, "loss": 0.0, "lr": 1.7468398811629206e-08, "epoch": 439.1666666666667, "percentage": 98.81, "elapsed_time": "4:29:24", "remaining_time": "0:03:14", "throughput": 685.05, "total_tokens": 11073360}
8103
+ {"current_steps": 39530, "total_steps": 40000, "loss": 0.0, "lr": 1.710338470627404e-08, "epoch": 439.22222222222223, "percentage": 98.83, "elapsed_time": "4:29:26", "remaining_time": "0:03:12", "throughput": 685.05, "total_tokens": 11074768}
8104
+ {"current_steps": 39535, "total_steps": 40000, "loss": 0.0, "lr": 1.6742223277529945e-08, "epoch": 439.27777777777777, "percentage": 98.84, "elapsed_time": "4:29:28", "remaining_time": "0:03:10", "throughput": 685.06, "total_tokens": 11076176}
8105
+ {"current_steps": 39540, "total_steps": 40000, "loss": 0.0, "lr": 1.6384914581094036e-08, "epoch": 439.3333333333333, "percentage": 98.85, "elapsed_time": "4:29:30", "remaining_time": "0:03:08", "throughput": 685.06, "total_tokens": 11077536}
8106
+ {"current_steps": 39545, "total_steps": 40000, "loss": 0.0, "lr": 1.6031458672069455e-08, "epoch": 439.3888888888889, "percentage": 98.86, "elapsed_time": "4:29:32", "remaining_time": "0:03:06", "throughput": 685.07, "total_tokens": 11078960}
8107
+ {"current_steps": 39550, "total_steps": 40000, "loss": 0.0, "lr": 1.5681855604962602e-08, "epoch": 439.44444444444446, "percentage": 98.88, "elapsed_time": "4:29:34", "remaining_time": "0:03:04", "throughput": 685.07, "total_tokens": 11080304}
8108
+ {"current_steps": 39555, "total_steps": 40000, "loss": 0.0, "lr": 1.5336105433683135e-08, "epoch": 439.5, "percentage": 98.89, "elapsed_time": "4:29:35", "remaining_time": "0:03:01", "throughput": 685.07, "total_tokens": 11081744}
8109
+ {"current_steps": 39560, "total_steps": 40000, "loss": 0.0, "lr": 1.499420821155506e-08, "epoch": 439.55555555555554, "percentage": 98.9, "elapsed_time": "4:29:37", "remaining_time": "0:02:59", "throughput": 685.08, "total_tokens": 11083120}
8110
+ {"current_steps": 39565, "total_steps": 40000, "loss": 0.0, "lr": 1.4656163991302874e-08, "epoch": 439.6111111111111, "percentage": 98.91, "elapsed_time": "4:29:39", "remaining_time": "0:02:57", "throughput": 685.08, "total_tokens": 11084512}
8111
+ {"current_steps": 39570, "total_steps": 40000, "loss": 0.0, "lr": 1.4321972825051544e-08, "epoch": 439.6666666666667, "percentage": 98.92, "elapsed_time": "4:29:41", "remaining_time": "0:02:55", "throughput": 685.09, "total_tokens": 11085904}
8112
+ {"current_steps": 39575, "total_steps": 40000, "loss": 0.0, "lr": 1.3991634764345951e-08, "epoch": 439.72222222222223, "percentage": 98.94, "elapsed_time": "4:29:43", "remaining_time": "0:02:53", "throughput": 685.09, "total_tokens": 11087344}
8113
+ {"current_steps": 39580, "total_steps": 40000, "loss": 0.0, "lr": 1.3665149860120352e-08, "epoch": 439.77777777777777, "percentage": 98.95, "elapsed_time": "4:29:45", "remaining_time": "0:02:51", "throughput": 685.1, "total_tokens": 11088752}
8114
+ {"current_steps": 39585, "total_steps": 40000, "loss": 0.0, "lr": 1.3342518162728912e-08, "epoch": 439.8333333333333, "percentage": 98.96, "elapsed_time": "4:29:47", "remaining_time": "0:02:49", "throughput": 685.1, "total_tokens": 11090128}
8115
+ {"current_steps": 39590, "total_steps": 40000, "loss": 0.0, "lr": 1.30237397219235e-08, "epoch": 439.8888888888889, "percentage": 98.98, "elapsed_time": "4:29:49", "remaining_time": "0:02:47", "throughput": 685.11, "total_tokens": 11091552}
8116
+ {"current_steps": 39595, "total_steps": 40000, "loss": 0.0, "lr": 1.2708814586862016e-08, "epoch": 439.94444444444446, "percentage": 98.99, "elapsed_time": "4:29:51", "remaining_time": "0:02:45", "throughput": 685.11, "total_tokens": 11092912}
8117
+ {"current_steps": 39600, "total_steps": 40000, "loss": 0.0, "lr": 1.2397742806111168e-08, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "4:29:53", "remaining_time": "0:02:43", "throughput": 685.11, "total_tokens": 11094352}
8118
+ {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.3569253385066986, "epoch": 440.0, "percentage": 99.0, "elapsed_time": "4:29:54", "remaining_time": "0:02:43", "throughput": 685.05, "total_tokens": 11094352}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0, "lr": 1.209052442764369e-08, "epoch": 440.05555555555554, "percentage": 99.01, "elapsed_time": "4:29:58", "remaining_time": "0:02:41", "throughput": 684.99, "total_tokens": 11095760}