FormlessAI commited on
Commit
2012da4
·
verified ·
1 Parent(s): 5850a19

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59ef861020fc2c545d37b93fe3da9a5df44bc38bc8294f53e4761c47fe0b54ec
3
  size 46708280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcd1f99ca4639d5b7aeddd12e28d8ae4f66d17cb473b2d54aaeb23e2af3a90c
3
  size 46708280
last-checkpoint/global_step1220/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ad49aa0764581eb18dba5361c652635c87dd910c3ec584db7af66e0ea850eb
3
+ size 35203941
last-checkpoint/global_step1220/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df07287976a4ae0b3a6f366185192fae937f1a132311fc534bdd405e77802fd
3
+ size 35203941
last-checkpoint/global_step1220/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4b735fcb85fd83e7ed250f2192cc3b2e74a86ff1254c584c90e1170755aa9f
3
+ size 35204005
last-checkpoint/global_step1220/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58543f4049c28a886ff21739d7917eb63cf2045ca1a0804d41ac22d0186f0d67
3
+ size 35204005
last-checkpoint/global_step1220/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37c86006de61d0be3b78586059e4d98ea657b269c132c722369e6e4bc65670f
3
+ size 46865049
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1220
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d2bd7dfa69f36b5cf850cd9410f86cc2181cec385a8f9e5311babb0fe0865fe
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814c0560f0064c72cb05d6fdabce12f4ea60678ef99ae76b35beb0a5b61a746f
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91730c940535f449a5703b1d97f92415999c49e1bcf4e4f2f480a357a7f839ce
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d55f786f89c825e7eccff5f5ad6f275ae339974e5fc7ca3adc2610c4ddc7dc29
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba288defe7ae7069401e52dcaf43410bef5f897eca8da4d501e9e15160c2b41
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1050cb4078b8fb421e9d7cd11460b4f8b3150009a89652c0ce835de2f415292
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:386df291e7445294847744ac5bc975d02e8cbe3d748292dcfaae40c896523259
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a9ac1e1c4df9ca87b88219341350f97eb24cac74de6cbc9e039d0e0d106d3c
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01830d6a6e4b363a7484b09ff196f1e3fb9b43729e4f01d3432c57d9d582ee8b
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c32e39d9113c7bbf215e86b059c76588bc6462dcc095622f3886bf1b72f7af
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.0456267595291138,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.17083674413638467,
6
  "eval_steps": 20,
7
- "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2168,6 +2168,42 @@
2168
  "eval_samples_per_second": 512.638,
2169
  "eval_steps_per_second": 16.021,
2170
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2171
  }
2172
  ],
2173
  "logging_steps": 5,
@@ -2196,7 +2232,7 @@
2196
  "attributes": {}
2197
  }
2198
  },
2199
- "total_flos": 5.878891379026821e+17,
2200
  "train_batch_size": 8,
2201
  "trial_name": null,
2202
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.0451515913009644,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.17368402320532442,
6
  "eval_steps": 20,
7
+ "global_step": 1220,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2168
  "eval_samples_per_second": 512.638,
2169
  "eval_steps_per_second": 16.021,
2170
  "step": 1200
2171
+ },
2172
+ {
2173
+ "epoch": 0.1715485639036196,
2174
+ "grad_norm": 1.4823371171951294,
2175
+ "learning_rate": 2.8734766443620955e-05,
2176
+ "loss": 1.0662,
2177
+ "step": 1205
2178
+ },
2179
+ {
2180
+ "epoch": 0.17226038367085453,
2181
+ "grad_norm": 1.2662029266357422,
2182
+ "learning_rate": 2.8733780986581105e-05,
2183
+ "loss": 1.0515,
2184
+ "step": 1210
2185
+ },
2186
+ {
2187
+ "epoch": 0.17297220343808947,
2188
+ "grad_norm": 1.5725047588348389,
2189
+ "learning_rate": 2.8732791017215324e-05,
2190
+ "loss": 1.0703,
2191
+ "step": 1215
2192
+ },
2193
+ {
2194
+ "epoch": 0.17368402320532442,
2195
+ "grad_norm": 1.4703493118286133,
2196
+ "learning_rate": 2.8731796535835716e-05,
2197
+ "loss": 1.0047,
2198
+ "step": 1220
2199
+ },
2200
+ {
2201
+ "epoch": 0.17368402320532442,
2202
+ "eval_loss": 1.0451515913009644,
2203
+ "eval_runtime": 194.8815,
2204
+ "eval_samples_per_second": 512.614,
2205
+ "eval_steps_per_second": 16.02,
2206
+ "step": 1220
2207
  }
2208
  ],
2209
  "logging_steps": 5,
 
2232
  "attributes": {}
2233
  }
2234
  },
2235
+ "total_flos": 5.974615545851412e+17,
2236
  "train_batch_size": 8,
2237
  "trial_name": null,
2238
  "trial_params": null