danield12 commited on
Commit
bb15a66
·
verified ·
1 Parent(s): f32d01f

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c165d0b62d01dd989a09b638f70d1e0e4cc65d4e4e67081b177d4fbad18e25a
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93aa0cf7d99ad26d47d8643e22d1ffc2cdf84ec975fa62bf6a3744af6d78bffa
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.87,
4
- "eval_f1_score": 0.8705157155935307,
5
- "eval_gmean": 0.8715093639362524,
6
- "eval_loss": 0.26673829555511475,
7
- "eval_precision": 0.8726060606060605,
8
- "eval_recall": 0.87,
9
- "eval_runtime": 328.4912,
10
- "eval_samples_per_second": 0.304,
11
- "eval_steps_per_second": 0.04,
12
- "total_flos": 1.0505428534296576e+17,
13
- "train_loss": 0.3696013659965701,
14
- "train_runtime": 20858.2303,
15
- "train_samples_per_second": 0.252,
16
- "train_steps_per_second": 0.004
17
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "eval_accuracy": 0.84,
4
+ "eval_f1_score": 0.8423376623376623,
5
+ "eval_gmean": 0.8605301007689224,
6
+ "eval_loss": 0.3891015648841858,
7
+ "eval_precision": 0.8687999999999999,
8
+ "eval_recall": 0.84,
9
+ "eval_runtime": 156.3274,
10
+ "eval_samples_per_second": 0.32,
11
+ "eval_steps_per_second": 0.045,
12
+ "total_flos": 1.171238676922368e+17,
13
+ "train_loss": 0.9641927083333334,
14
+ "train_runtime": 18930.2986,
15
+ "train_samples_per_second": 0.317,
16
+ "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.87,
4
- "eval_f1_score": 0.8705157155935307,
5
- "eval_gmean": 0.8715093639362524,
6
- "eval_loss": 0.26673829555511475,
7
- "eval_precision": 0.8726060606060605,
8
- "eval_recall": 0.87,
9
- "eval_runtime": 328.4912,
10
- "eval_samples_per_second": 0.304,
11
- "eval_steps_per_second": 0.04
12
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "eval_accuracy": 0.84,
4
+ "eval_f1_score": 0.8423376623376623,
5
+ "eval_gmean": 0.8605301007689224,
6
+ "eval_loss": 0.3891015648841858,
7
+ "eval_precision": 0.8687999999999999,
8
+ "eval_recall": 0.84,
9
+ "eval_runtime": 156.3274,
10
+ "eval_samples_per_second": 0.32,
11
+ "eval_steps_per_second": 0.045
12
  }
runs/Mar31_00-08-10_a67b165e443d/events.out.tfevents.1711843692.a67b165e443d.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7a1a0cb5f8040ad8c4dcd07d9a78b1fe78c847cbe32b10144f0fbf81c6bec3
3
+ size 4919
runs/Mar31_00-13-11_a67b165e443d/events.out.tfevents.1711843992.a67b165e443d.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32887d276cf285148292e7134729f00a457bddcb533578fc2b61d2bbd63f61dd
3
+ size 9095
runs/Mar31_00-13-11_a67b165e443d/events.out.tfevents.1711863078.a67b165e443d.34.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62af20f2ac529efff6289f4571b270ba4b39f7fb509e997638675b8df412ad02
3
+ size 605
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 1.0505428534296576e+17,
4
- "train_loss": 0.3696013659965701,
5
- "train_runtime": 20858.2303,
6
- "train_samples_per_second": 0.252,
7
- "train_steps_per_second": 0.004
8
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "total_flos": 1.171238676922368e+17,
4
+ "train_loss": 0.9641927083333334,
5
+ "train_runtime": 18930.2986,
6
+ "train_samples_per_second": 0.317,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -1,229 +1,145 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9969604863221885,
5
- "eval_steps": 8,
6
- "global_step": 82,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.1,
13
- "grad_norm": 6.875,
14
- "learning_rate": 1.8500000000000002e-05,
15
- "loss": 0.3831,
16
  "step": 8
17
  },
18
  {
19
- "epoch": 0.1,
20
- "eval_accuracy": 0.87,
21
- "eval_f1_score": 0.8705157155935307,
22
- "eval_gmean": 0.8715093639362524,
23
- "eval_loss": 0.2664209008216858,
24
- "eval_precision": 0.8726060606060605,
25
- "eval_recall": 0.87,
26
- "eval_runtime": 325.1061,
27
- "eval_samples_per_second": 0.308,
28
- "eval_steps_per_second": 0.04,
29
- "step": 8
30
- },
31
- {
32
- "epoch": 0.19,
33
- "grad_norm": 7.4375,
34
- "learning_rate": 1.65e-05,
35
- "loss": 0.3261,
36
  "step": 16
37
  },
38
  {
39
- "epoch": 0.19,
40
- "eval_accuracy": 0.87,
41
- "eval_f1_score": 0.8705157155935307,
42
- "eval_gmean": 0.8715093639362524,
43
- "eval_loss": 0.26719239354133606,
44
- "eval_precision": 0.8726060606060605,
45
- "eval_recall": 0.87,
46
- "eval_runtime": 325.6461,
47
- "eval_samples_per_second": 0.307,
48
- "eval_steps_per_second": 0.04,
49
- "step": 16
50
- },
51
- {
52
- "epoch": 0.29,
53
- "grad_norm": 10.5625,
54
- "learning_rate": 1.45e-05,
55
- "loss": 0.3721,
56
- "step": 24
57
- },
58
- {
59
- "epoch": 0.29,
60
- "eval_accuracy": 0.87,
61
- "eval_f1_score": 0.8705157155935307,
62
- "eval_gmean": 0.8715093639362524,
63
- "eval_loss": 0.2671875059604645,
64
- "eval_precision": 0.8726060606060605,
65
- "eval_recall": 0.87,
66
- "eval_runtime": 325.3707,
67
- "eval_samples_per_second": 0.307,
68
- "eval_steps_per_second": 0.04,
69
  "step": 24
70
  },
71
  {
72
- "epoch": 0.39,
73
- "grad_norm": 9.5,
74
- "learning_rate": 1.25e-05,
75
- "loss": 0.3582,
76
- "step": 32
 
 
 
 
 
 
77
  },
78
  {
79
- "epoch": 0.39,
80
- "eval_accuracy": 0.87,
81
- "eval_f1_score": 0.8705157155935307,
82
- "eval_gmean": 0.8715093639362524,
83
- "eval_loss": 0.2667529284954071,
84
- "eval_precision": 0.8726060606060605,
85
- "eval_recall": 0.87,
86
- "eval_runtime": 325.8406,
87
- "eval_samples_per_second": 0.307,
88
- "eval_steps_per_second": 0.04,
89
  "step": 32
90
  },
91
  {
92
- "epoch": 0.49,
93
- "grad_norm": 5.84375,
94
- "learning_rate": 1.0500000000000001e-05,
95
- "loss": 0.4175,
96
  "step": 40
97
  },
98
  {
99
- "epoch": 0.49,
100
- "eval_accuracy": 0.87,
101
- "eval_f1_score": 0.8705157155935307,
102
- "eval_gmean": 0.8715093639362524,
103
- "eval_loss": 0.26649415493011475,
104
- "eval_precision": 0.8726060606060605,
105
- "eval_recall": 0.87,
106
- "eval_runtime": 325.6291,
107
- "eval_samples_per_second": 0.307,
108
- "eval_steps_per_second": 0.04,
109
- "step": 40
110
- },
111
- {
112
- "epoch": 0.58,
113
- "grad_norm": 4.90625,
114
- "learning_rate": 8.5e-06,
115
- "loss": 0.2994,
116
- "step": 48
117
- },
118
- {
119
- "epoch": 0.58,
120
- "eval_accuracy": 0.87,
121
- "eval_f1_score": 0.8705157155935307,
122
- "eval_gmean": 0.8715093639362524,
123
- "eval_loss": 0.2669140696525574,
124
- "eval_precision": 0.8726060606060605,
125
- "eval_recall": 0.87,
126
- "eval_runtime": 325.6436,
127
- "eval_samples_per_second": 0.307,
128
- "eval_steps_per_second": 0.04,
129
  "step": 48
130
  },
131
  {
132
- "epoch": 0.68,
133
- "grad_norm": 7.375,
134
- "learning_rate": 6.5000000000000004e-06,
135
- "loss": 0.3524,
136
  "step": 56
137
  },
138
  {
139
- "epoch": 0.68,
140
- "eval_accuracy": 0.87,
141
- "eval_f1_score": 0.8705157155935307,
142
- "eval_gmean": 0.8715093639362524,
143
- "eval_loss": 0.2667480409145355,
144
- "eval_precision": 0.8726060606060605,
145
- "eval_recall": 0.87,
146
- "eval_runtime": 328.7964,
147
- "eval_samples_per_second": 0.304,
148
- "eval_steps_per_second": 0.04,
149
  "step": 56
150
  },
151
  {
152
- "epoch": 0.78,
153
- "grad_norm": 5.40625,
154
- "learning_rate": 4.5e-06,
155
- "loss": 0.3961,
156
- "step": 64
157
- },
158
- {
159
- "epoch": 0.78,
160
- "eval_accuracy": 0.87,
161
- "eval_f1_score": 0.8705157155935307,
162
- "eval_gmean": 0.8715093639362524,
163
- "eval_loss": 0.26673829555511475,
164
- "eval_precision": 0.8726060606060605,
165
- "eval_recall": 0.87,
166
- "eval_runtime": 327.9706,
167
- "eval_samples_per_second": 0.305,
168
- "eval_steps_per_second": 0.04,
169
  "step": 64
170
  },
171
  {
172
- "epoch": 0.88,
173
- "grad_norm": 8.3125,
174
- "learning_rate": 2.5e-06,
175
- "loss": 0.4341,
176
- "step": 72
177
- },
178
- {
179
- "epoch": 0.88,
180
- "eval_accuracy": 0.87,
181
- "eval_f1_score": 0.8705157155935307,
182
- "eval_gmean": 0.8715093639362524,
183
- "eval_loss": 0.26673829555511475,
184
- "eval_precision": 0.8726060606060605,
185
- "eval_recall": 0.87,
186
- "eval_runtime": 328.7312,
187
- "eval_samples_per_second": 0.304,
188
- "eval_steps_per_second": 0.04,
189
  "step": 72
190
  },
191
  {
192
- "epoch": 0.97,
193
- "grad_norm": 6.3125,
194
- "learning_rate": 5.000000000000001e-07,
195
- "loss": 0.3743,
196
- "step": 80
197
- },
198
- {
199
- "epoch": 0.97,
200
- "eval_accuracy": 0.87,
201
- "eval_f1_score": 0.8705157155935307,
202
- "eval_gmean": 0.8715093639362524,
203
- "eval_loss": 0.26673829555511475,
204
- "eval_precision": 0.8726060606060605,
205
- "eval_recall": 0.87,
206
- "eval_runtime": 328.568,
207
- "eval_samples_per_second": 0.304,
208
- "eval_steps_per_second": 0.04,
209
  "step": 80
210
  },
211
  {
212
- "epoch": 1.0,
213
- "step": 82,
214
- "total_flos": 1.0505428534296576e+17,
215
- "train_loss": 0.3696013659965701,
216
- "train_runtime": 20858.2303,
217
- "train_samples_per_second": 0.252,
218
- "train_steps_per_second": 0.004
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  }
220
  ],
221
  "logging_steps": 8,
222
- "max_steps": 82,
223
  "num_input_tokens_seen": 0,
224
  "num_train_epochs": 1,
225
  "save_steps": 500,
226
- "total_flos": 1.0505428534296576e+17,
227
  "train_batch_size": 16,
228
  "trial_name": null,
229
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.992,
5
+ "eval_steps": 28,
6
+ "global_step": 93,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.09,
13
+ "grad_norm": 35.25,
14
+ "learning_rate": 1.8681318681318682e-05,
15
+ "loss": 1.1454,
16
  "step": 8
17
  },
18
  {
19
+ "epoch": 0.17,
20
+ "grad_norm": 36.5,
21
+ "learning_rate": 1.6923076923076924e-05,
22
+ "loss": 1.1827,
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "step": 16
24
  },
25
  {
26
+ "epoch": 0.26,
27
+ "grad_norm": 34.0,
28
+ "learning_rate": 1.5164835164835166e-05,
29
+ "loss": 1.1057,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "step": 24
31
  },
32
  {
33
+ "epoch": 0.3,
34
+ "eval_accuracy": 0.82,
35
+ "eval_f1_score": 0.8225454545454545,
36
+ "eval_gmean": 0.844292130744656,
37
+ "eval_loss": 0.4087499976158142,
38
+ "eval_precision": 0.8572435897435898,
39
+ "eval_recall": 0.82,
40
+ "eval_runtime": 156.306,
41
+ "eval_samples_per_second": 0.32,
42
+ "eval_steps_per_second": 0.045,
43
+ "step": 28
44
  },
45
  {
46
+ "epoch": 0.34,
47
+ "grad_norm": 30.75,
48
+ "learning_rate": 1.3406593406593406e-05,
49
+ "loss": 0.9439,
 
 
 
 
 
 
50
  "step": 32
51
  },
52
  {
53
+ "epoch": 0.43,
54
+ "grad_norm": 35.25,
55
+ "learning_rate": 1.164835164835165e-05,
56
+ "loss": 0.9382,
57
  "step": 40
58
  },
59
  {
60
+ "epoch": 0.51,
61
+ "grad_norm": 35.25,
62
+ "learning_rate": 9.890109890109892e-06,
63
+ "loss": 0.9136,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  "step": 48
65
  },
66
  {
67
+ "epoch": 0.6,
68
+ "grad_norm": 35.75,
69
+ "learning_rate": 8.131868131868132e-06,
70
+ "loss": 0.8892,
71
  "step": 56
72
  },
73
  {
74
+ "epoch": 0.6,
75
+ "eval_accuracy": 0.84,
76
+ "eval_f1_score": 0.8423376623376623,
77
+ "eval_gmean": 0.8605301007689224,
78
+ "eval_loss": 0.39445313811302185,
79
+ "eval_precision": 0.8687999999999999,
80
+ "eval_recall": 0.84,
81
+ "eval_runtime": 156.2149,
82
+ "eval_samples_per_second": 0.32,
83
+ "eval_steps_per_second": 0.045,
84
  "step": 56
85
  },
86
  {
87
+ "epoch": 0.68,
88
+ "grad_norm": 25.0,
89
+ "learning_rate": 6.373626373626373e-06,
90
+ "loss": 0.9037,
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  "step": 64
92
  },
93
  {
94
+ "epoch": 0.77,
95
+ "grad_norm": 19.25,
96
+ "learning_rate": 4.615384615384616e-06,
97
+ "loss": 0.8933,
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  "step": 72
99
  },
100
  {
101
+ "epoch": 0.85,
102
+ "grad_norm": 21.0,
103
+ "learning_rate": 2.8571428571428573e-06,
104
+ "loss": 0.8473,
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "step": 80
106
  },
107
  {
108
+ "epoch": 0.9,
109
+ "eval_accuracy": 0.84,
110
+ "eval_f1_score": 0.8423376623376623,
111
+ "eval_gmean": 0.8605301007689224,
112
+ "eval_loss": 0.3901953101158142,
113
+ "eval_precision": 0.8687999999999999,
114
+ "eval_recall": 0.84,
115
+ "eval_runtime": 155.9323,
116
+ "eval_samples_per_second": 0.321,
117
+ "eval_steps_per_second": 0.045,
118
+ "step": 84
119
+ },
120
+ {
121
+ "epoch": 0.94,
122
+ "grad_norm": 32.5,
123
+ "learning_rate": 1.098901098901099e-06,
124
+ "loss": 0.8776,
125
+ "step": 88
126
+ },
127
+ {
128
+ "epoch": 0.99,
129
+ "step": 93,
130
+ "total_flos": 1.171238676922368e+17,
131
+ "train_loss": 0.9641927083333334,
132
+ "train_runtime": 18930.2986,
133
+ "train_samples_per_second": 0.317,
134
+ "train_steps_per_second": 0.005
135
  }
136
  ],
137
  "logging_steps": 8,
138
+ "max_steps": 93,
139
  "num_input_tokens_seen": 0,
140
  "num_train_epochs": 1,
141
  "save_steps": 500,
142
+ "total_flos": 1.171238676922368e+17,
143
  "train_batch_size": 16,
144
  "trial_name": null,
145
  "trial_params": null