rovdetection commited on
Commit
f7ac1b8
·
verified ·
1 Parent(s): fe8d628

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01f02a6552ebf557c663261cde7e237513b533f9cc549a0a4e441821d246faa3
3
  size 4523108832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a751a165bf17614987ee30caba843bf951957ba5761cc1ce2081c7374c53074
3
  size 4523108832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9735805862fcaf445077a15d872c7a340cd7e8ea695d46738281425d99ff7bc1
3
  size 2911851147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7480e0708a0bb83f0630f8cb4be5db168c5200d77f56a0ec1bd890be73a54559
3
  size 2911851147
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77569c2e850b04af982cc8c1389f1430851448915c593b69e5da36ce05b71d7
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b61ed96b9f34f057dffa0bad8ef6959040ba1cfe848017506ba05f40fdaea76
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61361c878721548392539ed308adea82ec21fc99e9c9e2512a2e560c5477b77c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 1.025797724723816,
4
- "best_model_checkpoint": "./sft-out/checkpoint-500",
5
- "epoch": 0.8840267418089397,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -86,6 +86,84 @@
86
  "eval_samples_per_second": 15.867,
87
  "eval_steps_per_second": 1.999,
88
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 50,
@@ -100,12 +178,12 @@
100
  "should_evaluate": false,
101
  "should_log": false,
102
  "should_save": true,
103
- "should_training_stop": false
104
  },
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 1.9429307822186496e+16,
109
  "train_batch_size": 1,
110
  "trial_name": null,
111
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.9919272661209106,
4
+ "best_model_checkpoint": "./sft-out/checkpoint-1000",
5
+ "epoch": 1.7673352118901597,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
86
  "eval_samples_per_second": 15.867,
87
  "eval_steps_per_second": 1.999,
88
  "step": 500
89
+ },
90
+ {
91
+ "epoch": 0.9724294159898337,
92
+ "grad_norm": 2.3597640991210938,
93
+ "learning_rate": 1.0034906514152239e-05,
94
+ "loss": 1.0521656036376954,
95
+ "step": 550
96
+ },
97
+ {
98
+ "epoch": 1.060113818443008,
99
+ "grad_norm": 2.0308382511138916,
100
+ "learning_rate": 8.297905008339677e-06,
101
+ "loss": 0.8026390075683594,
102
+ "step": 600
103
+ },
104
+ {
105
+ "epoch": 1.148516492623902,
106
+ "grad_norm": 2.3317034244537354,
107
+ "learning_rate": 6.612620797547087e-06,
108
+ "loss": 0.6939823150634765,
109
+ "step": 650
110
+ },
111
+ {
112
+ "epoch": 1.2369191668047959,
113
+ "grad_norm": 2.5876853466033936,
114
+ "learning_rate": 5.030260389724447e-06,
115
+ "loss": 0.6835686492919922,
116
+ "step": 700
117
+ },
118
+ {
119
+ "epoch": 1.3253218409856897,
120
+ "grad_norm": 2.342280149459839,
121
+ "learning_rate": 3.598903005150444e-06,
122
+ "loss": 0.6746553039550781,
123
+ "step": 750
124
+ },
125
+ {
126
+ "epoch": 1.4137245151665838,
127
+ "grad_norm": 2.354048728942871,
128
+ "learning_rate": 2.362039713653581e-06,
129
+ "loss": 0.6704821014404296,
130
+ "step": 800
131
+ },
132
+ {
133
+ "epoch": 1.502127189347478,
134
+ "grad_norm": 2.01891827583313,
135
+ "learning_rate": 1.3572519804629537e-06,
136
+ "loss": 0.6500045776367187,
137
+ "step": 850
138
+ },
139
+ {
140
+ "epoch": 1.5905298635283718,
141
+ "grad_norm": 2.010759115219116,
142
+ "learning_rate": 6.150697724044407e-07,
143
+ "loss": 0.6556130981445313,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 1.6789325377092656,
148
+ "grad_norm": 2.3100311756134033,
149
+ "learning_rate": 1.580439203075812e-07,
150
+ "loss": 0.6622157287597656,
151
+ "step": 950
152
+ },
153
+ {
154
+ "epoch": 1.7673352118901597,
155
+ "grad_norm": 2.064932346343994,
156
+ "learning_rate": 6.092342209607083e-11,
157
+ "loss": 0.6651800537109375,
158
+ "step": 1000
159
+ },
160
+ {
161
+ "epoch": 1.7673352118901597,
162
+ "eval_loss": 0.9919272661209106,
163
+ "eval_runtime": 31.6701,
164
+ "eval_samples_per_second": 15.788,
165
+ "eval_steps_per_second": 1.989,
166
+ "step": 1000
167
  }
168
  ],
169
  "logging_steps": 50,
 
178
  "should_evaluate": false,
179
  "should_log": false,
180
  "should_save": true,
181
+ "should_training_stop": true
182
  },
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 3.876769443016704e+16,
187
  "train_batch_size": 1,
188
  "trial_name": null,
189
  "trial_params": null