flytech commited on
Commit
af310c9
·
1 Parent(s): 84540e2

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c47212017af1d1890422725962f2f60e0630141c69c87c0f307cb0f6c9fd9a1
3
  size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8641639f36d2017cd3be1c77d3de39c47ab3545ed6cdf0ef30eafe1db1dca62e
3
  size 67143296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8fdb34b5032791db249965fe2cb89cd69e01769c1856d0a422fea22e2d6b103
3
  size 33920095
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e354b2b9a397b20dbba46c4d4bd8056e9c164df5588f0377d09afe9a838825
3
  size 33920095
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:036fe95b6a14168d369a32ce869914bf19a1cf115c749866d43c0dbc23f4fc3c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec45735acf0d21a6a13a33353b33cadb4eb3f9c802c936af55a3f5542ea95b3
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5e88c363c3a1a500cd49caa98c7b1302e3233ffa1415f85f0e1a36caffee951
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1352cee7a141eadc0dec5a54fb0352c24d3de0192e8f627a38660bb21a3c14e2
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.4390243902439024,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -59,13 +59,26 @@
59
  "eval_samples_per_second": 2.038,
60
  "eval_steps_per_second": 0.266,
61
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 100,
65
  "max_steps": 1312,
66
  "num_train_epochs": 8,
67
  "save_steps": 100,
68
- "total_flos": 3.322448287314739e+16,
69
  "trial_name": null,
70
  "trial_params": null
71
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.048780487804878,
5
  "eval_steps": 100,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
59
  "eval_samples_per_second": 2.038,
60
  "eval_steps_per_second": 0.266,
61
  "step": 400
62
+ },
63
+ {
64
+ "epoch": 3.05,
65
+ "learning_rate": 0.0002,
66
+ "loss": 0.0777,
67
+ "step": 500
68
+ },
69
+ {
70
+ "epoch": 3.05,
71
+ "eval_runtime": 33.8767,
72
+ "eval_samples_per_second": 2.037,
73
+ "eval_steps_per_second": 0.266,
74
+ "step": 500
75
  }
76
  ],
77
  "logging_steps": 100,
78
  "max_steps": 1312,
79
  "num_train_epochs": 8,
80
  "save_steps": 100,
81
+ "total_flos": 4.151500035902669e+16,
82
  "trial_name": null,
83
  "trial_params": null
84
  }