mifeng09 commited on
Commit
628e688
·
verified ·
1 Parent(s): 1cd1b7a

Training in progress, step 1011, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1660eed46ba6f0d3fa1ec0731090d018b236a179f4718f8ff7102f222b325f96
3
  size 1321024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41bdb0793d89736e61aa90347d62039b1cc5b91d002ff8baba99be9a418e29d
3
  size 1321024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c32d00e76b7d2bb3304546cfe4632a05f4a37dad22e346b3452ae05d7cb5ecca
3
  size 2687115
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602b1835241dd5c7abd50b47b76431b7081683679692152e8315862e2c995483
3
  size 2687115
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c96f1bff79da150254573441bd748e86215cf8b6a0ff921f8f2e20b39f409151
3
  size 14455
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:279402b83f987e0900d353b3d8f08d84cf30415c38cb898df2f810aaabbad630
3
  size 14455
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1da8be2dfd2f4a410756ef5e4eb0e514dd1783dcb1fde6c88b2ea7e89de617f2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa023108973b8b5d0e85230f52d73593ff1c13cfe94cfa4694449750a3f0f6c6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1000,
3
  "best_metric": 3.7591054439544678,
4
  "best_model_checkpoint": "/kaggle/working/drive/MyDrive/llama_lora_finetuned/checkpoint-1000",
5
- "epoch": 2.9673590504451037,
6
  "eval_steps": 50,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7168,6 +7168,83 @@
7168
  "eval_samples_per_second": 1.679,
7169
  "eval_steps_per_second": 0.213,
7170
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7171
  }
7172
  ],
7173
  "logging_steps": 1,
@@ -7182,12 +7259,12 @@
7182
  "should_evaluate": false,
7183
  "should_log": false,
7184
  "should_save": true,
7185
- "should_training_stop": false
7186
  },
7187
  "attributes": {}
7188
  }
7189
  },
7190
- "total_flos": 584680067334144.0,
7191
  "train_batch_size": 4,
7192
  "trial_name": null,
7193
  "trial_params": null
 
2
  "best_global_step": 1000,
3
  "best_metric": 3.7591054439544678,
4
  "best_model_checkpoint": "/kaggle/working/drive/MyDrive/llama_lora_finetuned/checkpoint-1000",
5
+ "epoch": 3.0,
6
  "eval_steps": 50,
7
+ "global_step": 1011,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7168
  "eval_samples_per_second": 1.679,
7169
  "eval_steps_per_second": 0.213,
7170
  "step": 1000
7171
+ },
7172
+ {
7173
+ "epoch": 2.970326409495549,
7174
+ "grad_norm": 0.8667409420013428,
7175
+ "learning_rate": 3.604874476173525e-08,
7176
+ "loss": 3.3783,
7177
+ "step": 1001
7178
+ },
7179
+ {
7180
+ "epoch": 2.973293768545994,
7181
+ "grad_norm": 1.0062025785446167,
7182
+ "learning_rate": 2.9792972446479605e-08,
7183
+ "loss": 3.5963,
7184
+ "step": 1002
7185
+ },
7186
+ {
7187
+ "epoch": 2.9762611275964392,
7188
+ "grad_norm": 0.7768653035163879,
7189
+ "learning_rate": 2.4132763089035338e-08,
7190
+ "loss": 3.8658,
7191
+ "step": 1003
7192
+ },
7193
+ {
7194
+ "epoch": 2.979228486646884,
7195
+ "grad_norm": 1.0506861209869385,
7196
+ "learning_rate": 1.9068184149834134e-08,
7197
+ "loss": 3.8484,
7198
+ "step": 1004
7199
+ },
7200
+ {
7201
+ "epoch": 2.9821958456973294,
7202
+ "grad_norm": 0.7125198245048523,
7203
+ "learning_rate": 1.4599295990352924e-08,
7204
+ "loss": 3.7196,
7205
+ "step": 1005
7206
+ },
7207
+ {
7208
+ "epoch": 2.9851632047477743,
7209
+ "grad_norm": 0.7124053835868835,
7210
+ "learning_rate": 1.0726151872408884e-08,
7211
+ "loss": 4.2165,
7212
+ "step": 1006
7213
+ },
7214
+ {
7215
+ "epoch": 2.9881305637982196,
7216
+ "grad_norm": 1.1544585227966309,
7217
+ "learning_rate": 7.448797957526621e-09,
7218
+ "loss": 3.6743,
7219
+ "step": 1007
7220
+ },
7221
+ {
7222
+ "epoch": 2.991097922848665,
7223
+ "grad_norm": 0.9369633197784424,
7224
+ "learning_rate": 4.767273306405251e-09,
7225
+ "loss": 3.6329,
7226
+ "step": 1008
7227
+ },
7228
+ {
7229
+ "epoch": 2.99406528189911,
7230
+ "grad_norm": 0.8136985301971436,
7231
+ "learning_rate": 2.6816098784188117e-09,
7232
+ "loss": 3.6183,
7233
+ "step": 1009
7234
+ },
7235
+ {
7236
+ "epoch": 2.9970326409495547,
7237
+ "grad_norm": 0.9446889162063599,
7238
+ "learning_rate": 1.1918325312498812e-09,
7239
+ "loss": 4.1876,
7240
+ "step": 1010
7241
+ },
7242
+ {
7243
+ "epoch": 3.0,
7244
+ "grad_norm": 1.2427432537078857,
7245
+ "learning_rate": 2.979590206064753e-10,
7246
+ "loss": 3.8464,
7247
+ "step": 1011
7248
  }
7249
  ],
7250
  "logging_steps": 1,
 
7259
  "should_evaluate": false,
7260
  "should_log": false,
7261
  "should_save": true,
7262
+ "should_training_stop": true
7263
  },
7264
  "attributes": {}
7265
  }
7266
  },
7267
+ "total_flos": 590731906547712.0,
7268
  "train_batch_size": 4,
7269
  "trial_name": null,
7270
  "trial_params": null