mgh6 commited on
Commit
e3c5333
·
verified ·
1 Parent(s): 7ccb6c8

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b23bbf8e2596c69ade90bdc024987f09023799ee0a67f340bd5762b13fb39b2b
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24064aa8683230a7b3a039fd81e1b05e0ff78db2e5bf584fe89728e9df23ffe5
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b5275c4032b1e3f929b02daec7945e8c0af2d4f48ab0c6ad547f385702bcb0e
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7683c83debe0cae274d7783463a965c02eab380f24d7b8882509a86c4f8309
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59038f6fd31a878a0088aa82a3fa047e39cd383b08a1e2f45a6b7fe38fcc6c68
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9386b4e0c43c2c20517e468db5aea4f7e38dafdd46988a182c3761eebc734402
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fbcd595bd682a5655776734b02f2217b8256b553a1f0e90feb357642ed64497
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1880aef90fa44f8fcb9d240241d2b9fce349526552d8adb6f96784c29ef5265
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.5994375878768943,
5
  "eval_steps": 500,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,13 @@
157
  "learning_rate": 8.800000000000001e-05,
158
  "loss": 1.2961,
159
  "step": 1800
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 100,
@@ -176,7 +183,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 2.8093220107761746e+18,
180
  "train_batch_size": 8,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.799406342758944,
5
  "eval_steps": 500,
6
+ "global_step": 1900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "learning_rate": 8.800000000000001e-05,
158
  "loss": 1.2961,
159
  "step": 1800
160
+ },
161
+ {
162
+ "epoch": 3.799406342758944,
163
+ "grad_norm": 0.13838326930999756,
164
+ "learning_rate": 8.733333333333333e-05,
165
+ "loss": 1.2946,
166
+ "step": 1900
167
  }
168
  ],
169
  "logging_steps": 100,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 2.965396981964341e+18,
187
  "train_batch_size": 8,
188
  "trial_name": null,
189
  "trial_params": null