FormlessAI commited on
Commit
9ecde44
·
verified ·
1 Parent(s): 1bd95a3

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bcd1f99ca4639d5b7aeddd12e28d8ae4f66d17cb473b2d54aaeb23e2af3a90c
3
  size 46708280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f399f79386b62d14991a13933de2c4d9515d8ad5b112e7f3b6c47f0400762c7e
3
  size 46708280
last-checkpoint/global_step1240/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6dcad10b269bb77b5ea94fd6c121340af81332b2a594c1e5218577d4be9704d
3
+ size 35203941
last-checkpoint/global_step1240/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9060929a93e253502a267887987b1ff6dcf875ceea2962f3c6cda3d2abf20f4
3
+ size 35203941
last-checkpoint/global_step1240/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1a10b585fda800359703e02767a0929b9397ae6d34b7429d37373270343093
3
+ size 35204005
last-checkpoint/global_step1240/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:860bf12f4a41d11fcb7637d0759c5846d56b9f20a6b808b09480b4b5930bebed
3
+ size 35204005
last-checkpoint/global_step1240/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee322f972ee5ce691ce5c817cc6d9fd0024a789c69dee9ee84a6481e29f0501
3
+ size 46865049
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1220
 
1
+ global_step1240
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:814c0560f0064c72cb05d6fdabce12f4ea60678ef99ae76b35beb0a5b61a746f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b1568a3be6471fe5343d9ba70ecf971161f005dabab70609bbe0efc0a1154c
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d55f786f89c825e7eccff5f5ad6f275ae339974e5fc7ca3adc2610c4ddc7dc29
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ff2138cabf47b1e1f7e9f9e14437006377ad11fddd68d4eb807fb09fa9946c
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1050cb4078b8fb421e9d7cd11460b4f8b3150009a89652c0ce835de2f415292
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5dfc325f45bcb9e48691a30e44cd2d275c2b9efee589e712277429b85c7627a
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05a9ac1e1c4df9ca87b88219341350f97eb24cac74de6cbc9e039d0e0d106d3c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9004e7973fd05119b3de581247eeb96db6c413660040fe253c572682c10b1e4b
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c32e39d9113c7bbf215e86b059c76588bc6462dcc095622f3886bf1b72f7af
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718518f1ad8a78addabd073f6934994405f973a0d391d954551404409b75e08c
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.0451515913009644,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.17368402320532442,
6
  "eval_steps": 20,
7
- "global_step": 1220,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2204,6 +2204,42 @@
2204
  "eval_samples_per_second": 512.614,
2205
  "eval_steps_per_second": 16.02,
2206
  "step": 1220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2207
  }
2208
  ],
2209
  "logging_steps": 5,
@@ -2232,7 +2268,7 @@
2232
  "attributes": {}
2233
  }
2234
  },
2235
- "total_flos": 5.974615545851412e+17,
2236
  "train_batch_size": 8,
2237
  "trial_name": null,
2238
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.0437681674957275,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.17653130227426417,
6
  "eval_steps": 20,
7
+ "global_step": 1240,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2204
  "eval_samples_per_second": 512.614,
2205
  "eval_steps_per_second": 16.02,
2206
  "step": 1220
2207
+ },
2208
+ {
2209
+ "epoch": 0.17439584297255936,
2210
+ "grad_norm": 1.5431355237960815,
2211
+ "learning_rate": 2.87307975427558e-05,
2212
+ "loss": 1.0436,
2213
+ "step": 1225
2214
+ },
2215
+ {
2216
+ "epoch": 0.17510766273979428,
2217
+ "grad_norm": 1.57207453250885,
2218
+ "learning_rate": 2.8729794038290515e-05,
2219
+ "loss": 1.0629,
2220
+ "step": 1230
2221
+ },
2222
+ {
2223
+ "epoch": 0.17581948250702922,
2224
+ "grad_norm": 1.5140032768249512,
2225
+ "learning_rate": 2.8728786022756228e-05,
2226
+ "loss": 1.0282,
2227
+ "step": 1235
2228
+ },
2229
+ {
2230
+ "epoch": 0.17653130227426417,
2231
+ "grad_norm": 1.4375582933425903,
2232
+ "learning_rate": 2.8727773496470726e-05,
2233
+ "loss": 1.0326,
2234
+ "step": 1240
2235
+ },
2236
+ {
2237
+ "epoch": 0.17653130227426417,
2238
+ "eval_loss": 1.0437681674957275,
2239
+ "eval_runtime": 194.486,
2240
+ "eval_samples_per_second": 513.657,
2241
+ "eval_steps_per_second": 16.053,
2242
+ "step": 1240
2243
  }
2244
  ],
2245
  "logging_steps": 5,
 
2268
  "attributes": {}
2269
  }
2270
  },
2271
+ "total_flos": 6.070040067571712e+17,
2272
  "train_batch_size": 8,
2273
  "trial_name": null,
2274
  "trial_params": null