flytech commited on
Commit
f678724
·
1 Parent(s): 01316b8

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:625c24ad2702a76c58af2535cf9c27d049e84fb7e01989449430541b3b0fbfce
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c871ff3f5191ba9d84d95d49526b48b3a64015b90fcfb3b97308a69bd4baab8a
3
  size 67143296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9478eb7b3f3027bb4070c6389e3eba25e44251abcc324317c1e9a1079f1fc04b
3
  size 33920095
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9b0ae12eb6ec6a798b81a654cd5efccd601425ca8bb5851db76fc22e39b783
3
  size 33920095
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:518e379eb95b8761d691fa2f3663a7e6b2114c816fd3f33c63e566311da0ce7c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6556ac00826ca68da6956f62bae0bc37868dae3c7f5f2b790637f8c3ff47184f
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c1c649bd14a49756b729525a4732fa77be1d6c78a716ad6162c210414e48318
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8baac9452f9a11760308042a12ceaa04a49da1de0252fb4cb010c250b16b5f44
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.487804878048781,
5
  "eval_steps": 100,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -124,13 +124,26 @@
124
  "eval_samples_per_second": 2.037,
125
  "eval_steps_per_second": 0.266,
126
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  }
128
  ],
129
  "logging_steps": 100,
130
  "max_steps": 1312,
131
  "num_train_epochs": 8,
132
  "save_steps": 100,
133
- "total_flos": 7.473948323217408e+16,
134
  "trial_name": null,
135
  "trial_params": null
136
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.097560975609756,
5
  "eval_steps": 100,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
124
  "eval_samples_per_second": 2.037,
125
  "eval_steps_per_second": 0.266,
126
  "step": 900
127
+ },
128
+ {
129
+ "epoch": 6.1,
130
+ "learning_rate": 0.0002,
131
+ "loss": 0.0345,
132
+ "step": 1000
133
+ },
134
+ {
135
+ "epoch": 6.1,
136
+ "eval_runtime": 33.8717,
137
+ "eval_samples_per_second": 2.037,
138
+ "eval_steps_per_second": 0.266,
139
+ "step": 1000
140
  }
141
  ],
142
  "logging_steps": 100,
143
  "max_steps": 1312,
144
  "num_train_epochs": 8,
145
  "save_steps": 100,
146
+ "total_flos": 8.303000071805338e+16,
147
  "trial_name": null,
148
  "trial_params": null
149
  }