flytech commited on
Commit
53e1c27
·
1 Parent(s): 2a42fd0

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9186e1525e298ae957df9c7d816f8d853f4313465b69c77d89c1ff420ddbe60
3
  size 250422888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b18132b9672f7c85c253a46f48484aeaa498a25f0d48f0ad3a9fb28b398f31
3
  size 250422888
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08f5ea334dea64e776bbba9913a0c86bd471a0c408e54aacad6b42479b41b8f9
3
  size 126034975
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2773c5818788dd83effd883c5f81bb74fa3b3709e640723c52bf35a87050372f
3
  size 126034975
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76bd897477088d23bf49e684a4e51ad54eb04c1caf470ddcaf04b9dd491c8a7b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375510201f043d2e54302cf978e45789bb83136750b5df9dcd69ccff26ecc958
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcd2587f362188ac4728d4fa6edf8d2b0b6d72db365d49f7b847d4d79e3da09f
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e0a1736a46fd1627af3c246e44261aaac909256abbd413b5ee5c968f6b2d8e
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.878048780487805,
5
  "eval_steps": 200,
6
- "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -111,13 +111,26 @@
111
  "eval_samples_per_second": 3.672,
112
  "eval_steps_per_second": 0.239,
113
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
  ],
116
  "logging_steps": 200,
117
  "max_steps": 2624,
118
  "num_train_epochs": 8,
119
  "save_steps": 200,
120
- "total_flos": 5.156125752164352e+17,
121
  "trial_name": null,
122
  "trial_params": null
123
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.487804878048781,
5
  "eval_steps": 200,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
111
  "eval_samples_per_second": 3.672,
112
  "eval_steps_per_second": 0.239,
113
  "step": 1600
114
+ },
115
+ {
116
+ "epoch": 5.49,
117
+ "learning_rate": 0.0002,
118
+ "loss": 0.0348,
119
+ "step": 1800
120
+ },
121
+ {
122
+ "epoch": 5.49,
123
+ "eval_runtime": 75.1286,
124
+ "eval_samples_per_second": 3.674,
125
+ "eval_steps_per_second": 0.24,
126
+ "step": 1800
127
  }
128
  ],
129
  "logging_steps": 200,
130
  "max_steps": 2624,
131
  "num_train_epochs": 8,
132
  "save_steps": 200,
133
+ "total_flos": 5.79922850217984e+17,
134
  "trial_name": null,
135
  "trial_params": null
136
  }