mgh6 commited on
Commit
c5ae0d1
·
verified ·
1 Parent(s): 5dcba21

Training in progress, step 14800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f0570fbe1cfb2cc0429421c072380990511e34a42c049d209bffe41383ce964
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf358de268a54776074310a54a7c12deb56462a5265576a1eda5921bba9c5323
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16d8c736d6f95c350e9377664a0da87f05e71a8a7c9d7421b0cdb2eb7d7c320e
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0900ea58dd3831eedc9c64342e3517f37a47cd080a491487aa6e546116ba536d
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67a554a2b1dfd0fb5fde5dac2e3a00d7c706f9f81bb1fac139c3340134e19778
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3385d00673ab26e974b24f32d495806cac9e9d296dd8eda75a158061b999e4f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39a8fd283686b2b43f94f51c5406bd8c5a6961c6a78793c52b8339d3ac0fec03
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cbddae73c69a5a5e030d3ce88a9a12a39562863a4aaa77890660f67f9be7c7e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 29.3954069676613,
5
  "eval_steps": 500,
6
- "global_step": 14700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1268,6 +1268,13 @@
1268
  "learning_rate": 2.0000000000000003e-06,
1269
  "loss": 1.2179,
1270
  "step": 14700
 
 
 
 
 
 
 
1271
  }
1272
  ],
1273
  "logging_steps": 100,
@@ -1287,7 +1294,7 @@
1287
  "attributes": {}
1288
  }
1289
  },
1290
- "total_flos": 2.294275521542252e+19,
1291
  "train_batch_size": 8,
1292
  "trial_name": null,
1293
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.595375722543352,
5
  "eval_steps": 500,
6
+ "global_step": 14800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1268
  "learning_rate": 2.0000000000000003e-06,
1269
  "loss": 1.2179,
1270
  "step": 14700
1271
+ },
1272
+ {
1273
+ "epoch": 29.595375722543352,
1274
+ "grad_norm": 0.23738054931163788,
1275
+ "learning_rate": 1.3333333333333334e-06,
1276
+ "loss": 1.2181,
1277
+ "step": 14800
1278
  }
1279
  ],
1280
  "logging_steps": 100,
 
1294
  "attributes": {}
1295
  }
1296
  },
1297
+ "total_flos": 2.309883018661069e+19,
1298
  "train_batch_size": 8,
1299
  "trial_name": null,
1300
  "trial_params": null