FredericFan commited on
Commit
4735090
·
verified ·
1 Parent(s): 356727e

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38f7ad03559d709a4e2e50b069ded790243b605bd7f371ff573649b04f3b9ec1
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9704e41640d7704f052e32113e34a80ebd155dba2fd8f3a818c35af9ef8e5e5
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92b31881fd08d1c5e92f4d85e7ad4bea7caf87612bcfd05226d0426ef46f64b4
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38547c985ce9b90055e73bc70569507cc2022f06756bda43feaaa7134440ed4a
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8e3657fd3e577ff4e755452808dc3c4520d43cd58c493adfa9663f21feb734
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f2249892101c67b7f09df7f3b33fbce8ad4fc7b712e0895251ba03419a8b657
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc96f5c8ec054c4dc9f1608ed0c88e89518d2f17c416476f4efe7bf3b829bb03
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dd16cd3f7a9b47079af7541224a232c825207a9e0cc8410dcba6e13de89ef34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
- "epoch": 1.08,
5
  "eval_steps": 500,
6
- "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2113,6 +2113,84 @@
2113
  "eval_samples_per_second": 22.715,
2114
  "eval_steps_per_second": 5.679,
2115
  "step": 13500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2116
  }
2117
  ],
2118
  "logging_steps": 50,
@@ -2132,7 +2210,7 @@
2132
  "attributes": {}
2133
  }
2134
  },
2135
- "total_flos": 3.288372609024e+16,
2136
  "train_batch_size": 4,
2137
  "trial_name": null,
2138
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
+ "epoch": 1.12,
5
  "eval_steps": 500,
6
+ "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2113
  "eval_samples_per_second": 22.715,
2114
  "eval_steps_per_second": 5.679,
2115
  "step": 13500
2116
+ },
2117
+ {
2118
+ "epoch": 1.084,
2119
+ "grad_norm": 0.1615404486656189,
2120
+ "learning_rate": 1.3743600000000002e-05,
2121
+ "loss": 0.0572,
2122
+ "step": 13550
2123
+ },
2124
+ {
2125
+ "epoch": 1.088,
2126
+ "grad_norm": 0.1500886082649231,
2127
+ "learning_rate": 1.36836e-05,
2128
+ "loss": 0.0557,
2129
+ "step": 13600
2130
+ },
2131
+ {
2132
+ "epoch": 1.092,
2133
+ "grad_norm": 0.08288303017616272,
2134
+ "learning_rate": 1.3623600000000001e-05,
2135
+ "loss": 0.0567,
2136
+ "step": 13650
2137
+ },
2138
+ {
2139
+ "epoch": 1.096,
2140
+ "grad_norm": 0.12978018820285797,
2141
+ "learning_rate": 1.3563600000000002e-05,
2142
+ "loss": 0.051,
2143
+ "step": 13700
2144
+ },
2145
+ {
2146
+ "epoch": 1.1,
2147
+ "grad_norm": 0.17383359372615814,
2148
+ "learning_rate": 1.35036e-05,
2149
+ "loss": 0.0527,
2150
+ "step": 13750
2151
+ },
2152
+ {
2153
+ "epoch": 1.104,
2154
+ "grad_norm": 0.21195685863494873,
2155
+ "learning_rate": 1.3443600000000001e-05,
2156
+ "loss": 0.0526,
2157
+ "step": 13800
2158
+ },
2159
+ {
2160
+ "epoch": 1.108,
2161
+ "grad_norm": 0.15831385552883148,
2162
+ "learning_rate": 1.33836e-05,
2163
+ "loss": 0.0595,
2164
+ "step": 13850
2165
+ },
2166
+ {
2167
+ "epoch": 1.112,
2168
+ "grad_norm": 0.23136693239212036,
2169
+ "learning_rate": 1.33236e-05,
2170
+ "loss": 0.0523,
2171
+ "step": 13900
2172
+ },
2173
+ {
2174
+ "epoch": 1.116,
2175
+ "grad_norm": 0.12520194053649902,
2176
+ "learning_rate": 1.3263600000000001e-05,
2177
+ "loss": 0.0603,
2178
+ "step": 13950
2179
+ },
2180
+ {
2181
+ "epoch": 1.12,
2182
+ "grad_norm": 0.10329103469848633,
2183
+ "learning_rate": 1.32036e-05,
2184
+ "loss": 0.0494,
2185
+ "step": 14000
2186
+ },
2187
+ {
2188
+ "epoch": 1.12,
2189
+ "eval_loss": 0.083070769906044,
2190
+ "eval_runtime": 88.0459,
2191
+ "eval_samples_per_second": 22.715,
2192
+ "eval_steps_per_second": 5.679,
2193
+ "step": 14000
2194
  }
2195
  ],
2196
  "logging_steps": 50,
 
2210
  "attributes": {}
2211
  }
2212
  },
2213
+ "total_flos": 3.410164187136e+16,
2214
  "train_batch_size": 4,
2215
  "trial_name": null,
2216
  "trial_params": null