moos124 commited on
Commit
e969c59
·
verified ·
1 Parent(s): 69f76e4

Training in progress, step 2170, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66710723d01236d9c96d2f18926db15f1f34385c49684ee70cd5c1b5e2bccce0
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24244e6a7998c6a6d8fd08a63899b0a99ff0229df4a93b899a4402fb6966618f
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b830f4bd5f10ddaa78c2ccd5bae960791c4296cf33c74d863b325c31d2ac5eec
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c85605483e24b33e7e913cdff9000f6465fbb2ce4da1e8b99361c7b546bad7
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dd591345e5a7f523e887729293c5e6719efb2817a7a7d69905e2063008bb6d9
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10a84bf02e9924e461173db74a7b02d3634cfeec860a3b21ebe7269a82110440
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1952f51067506f4daa1c4e5ae09ad820e7ac7d72f46bcd41a257149deb5cd71
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30abea7aa4d270d5dc2f5fc53ff4372a7dbbff73e412e7ba6dafe81e53a93542
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4608,
6
  "eval_steps": 500,
7
- "global_step": 2160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2178,6 +2178,16 @@
2178
  "mean_token_accuracy": 0.7566891044378281,
2179
  "num_tokens": 10020194.0,
2180
  "step": 2160
 
 
 
 
 
 
 
 
 
 
2181
  }
2182
  ],
2183
  "logging_steps": 10,
@@ -2197,7 +2207,7 @@
2197
  "attributes": {}
2198
  }
2199
  },
2200
- "total_flos": 4.754142827711693e+16,
2201
  "train_batch_size": 4,
2202
  "trial_name": null,
2203
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4629333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 2170,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2178
  "mean_token_accuracy": 0.7566891044378281,
2179
  "num_tokens": 10020194.0,
2180
  "step": 2160
2181
+ },
2182
+ {
2183
+ "entropy": 0.961787448823452,
2184
+ "epoch": 0.4629333333333333,
2185
+ "grad_norm": 0.2845711410045624,
2186
+ "learning_rate": 8.98965663006169e-05,
2187
+ "loss": 1.0939053535461425,
2188
+ "mean_token_accuracy": 0.7606314912438392,
2189
+ "num_tokens": 10067865.0,
2190
+ "step": 2170
2191
  }
2192
  ],
2193
  "logging_steps": 10,
 
2207
  "attributes": {}
2208
  }
2209
  },
2210
+ "total_flos": 4.775840703913267e+16,
2211
  "train_batch_size": 4,
2212
  "trial_name": null,
2213
  "trial_params": null