danield12 commited on
Commit
f50323f
·
verified ·
1 Parent(s): 694a3e4

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f807cf91166999ea3123988f2e2b2ce66a6dad049d9f6c14b3c3202300b1673
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4411c80e204fa52239f0e767750debd1cf11225e340233f1709705364827150e
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.78,
4
- "eval_f1_score": 0.7785897435897436,
5
- "eval_gmean": 0.7961000720231877,
6
- "eval_loss": 0.5531054735183716,
7
- "eval_precision": 0.8186666666666668,
8
- "eval_recall": 0.78,
9
- "eval_runtime": 273.9816,
10
- "eval_samples_per_second": 0.365,
11
- "eval_steps_per_second": 0.047,
12
- "total_flos": 1.6845278653034496e+17,
13
- "train_loss": 0.7047583736590485,
14
- "train_runtime": 28553.3073,
15
- "train_samples_per_second": 0.3,
16
  "train_steps_per_second": 0.005
17
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.85,
4
+ "eval_f1_score": 0.8504053648283457,
5
+ "eval_gmean": 0.8587214900086042,
6
+ "eval_loss": 0.44022461771965027,
7
+ "eval_precision": 0.8646326776395022,
8
+ "eval_recall": 0.85,
9
+ "eval_runtime": 281.2283,
10
+ "eval_samples_per_second": 0.356,
11
+ "eval_steps_per_second": 0.046,
12
+ "total_flos": 8.435821078904832e+16,
13
+ "train_loss": 0.508428317397388,
14
+ "train_runtime": 14877.7814,
15
+ "train_samples_per_second": 0.288,
16
  "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.78,
4
- "eval_f1_score": 0.7785897435897436,
5
- "eval_gmean": 0.7961000720231877,
6
- "eval_loss": 0.5531054735183716,
7
- "eval_precision": 0.8186666666666668,
8
- "eval_recall": 0.78,
9
- "eval_runtime": 273.9816,
10
- "eval_samples_per_second": 0.365,
11
- "eval_steps_per_second": 0.047
12
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.85,
4
+ "eval_f1_score": 0.8504053648283457,
5
+ "eval_gmean": 0.8587214900086042,
6
+ "eval_loss": 0.44022461771965027,
7
+ "eval_precision": 0.8646326776395022,
8
+ "eval_recall": 0.85,
9
+ "eval_runtime": 281.2283,
10
+ "eval_samples_per_second": 0.356,
11
+ "eval_steps_per_second": 0.046
12
  }
runs/Mar26_12-22-53_b3dddff7eb96/events.out.tfevents.1711455775.b3dddff7eb96.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7dd3bfefdb6a27bc808a2fd6c881197546cb71d5fa895b9a55c25e5687aae6
3
+ size 9018
runs/Mar26_12-22-53_b3dddff7eb96/events.out.tfevents.1711470934.b3dddff7eb96.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd109c8b8cd3ceb5536b984dbf67939949accd7cb0508292411f7c12f6128c7
3
+ size 605
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 1.6845278653034496e+17,
4
- "train_loss": 0.7047583736590485,
5
- "train_runtime": 28553.3073,
6
- "train_samples_per_second": 0.3,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 8.435821078904832e+16,
4
+ "train_loss": 0.508428317397388,
5
+ "train_runtime": 14877.7814,
6
+ "train_samples_per_second": 0.288,
7
  "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -3,243 +3,135 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 16,
6
- "global_step": 134,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.06,
13
- "grad_norm": 53.25,
14
- "learning_rate": 1.9090909090909094e-05,
15
- "loss": 1.1167,
16
- "step": 8
17
- },
18
- {
19
- "epoch": 0.12,
20
- "grad_norm": 44.5,
21
- "learning_rate": 1.787878787878788e-05,
22
- "loss": 0.895,
23
- "step": 16
24
- },
25
  {
26
  "epoch": 0.12,
27
- "eval_accuracy": 0.66,
28
- "eval_f1_score": 0.644367816091954,
29
- "eval_gmean": 0.6885077030381411,
30
- "eval_loss": 0.8683984279632568,
31
- "eval_precision": 0.7505555555555555,
32
- "eval_recall": 0.66,
33
- "eval_runtime": 272.9997,
34
- "eval_samples_per_second": 0.366,
35
- "eval_steps_per_second": 0.048,
36
- "step": 16
37
- },
38
- {
39
- "epoch": 0.18,
40
- "grad_norm": 46.5,
41
- "learning_rate": 1.6666666666666667e-05,
42
- "loss": 0.829,
43
- "step": 24
44
  },
45
  {
46
  "epoch": 0.24,
47
- "grad_norm": 30.875,
48
- "learning_rate": 1.5454545454545454e-05,
49
- "loss": 0.747,
50
- "step": 32
51
  },
52
  {
53
  "epoch": 0.24,
54
- "eval_accuracy": 0.72,
55
- "eval_f1_score": 0.7137777777777776,
56
- "eval_gmean": 0.7423479773532891,
57
- "eval_loss": 0.7020312547683716,
58
- "eval_precision": 0.783921568627451,
59
- "eval_recall": 0.72,
60
- "eval_runtime": 273.193,
61
- "eval_samples_per_second": 0.366,
62
- "eval_steps_per_second": 0.048,
63
- "step": 32
64
- },
65
- {
66
- "epoch": 0.3,
67
- "grad_norm": 29.375,
68
- "learning_rate": 1.4242424242424245e-05,
69
- "loss": 0.7173,
70
- "step": 40
71
- },
72
- {
73
- "epoch": 0.36,
74
- "grad_norm": 28.875,
75
- "learning_rate": 1.3030303030303032e-05,
76
- "loss": 0.7169,
77
- "step": 48
78
  },
79
  {
80
  "epoch": 0.36,
81
- "eval_accuracy": 0.74,
82
- "eval_f1_score": 0.7358132045088567,
83
- "eval_gmean": 0.7602742157385604,
84
- "eval_loss": 0.6424999833106995,
85
- "eval_precision": 0.7952083333333333,
86
- "eval_recall": 0.74,
87
- "eval_runtime": 273.7345,
88
- "eval_samples_per_second": 0.365,
89
- "eval_steps_per_second": 0.047,
90
- "step": 48
91
- },
92
- {
93
- "epoch": 0.42,
94
- "grad_norm": 32.25,
95
- "learning_rate": 1.181818181818182e-05,
96
- "loss": 0.6892,
97
- "step": 56
98
  },
99
  {
100
  "epoch": 0.48,
101
- "grad_norm": 35.25,
102
- "learning_rate": 1.0606060606060606e-05,
103
- "loss": 0.7526,
104
- "step": 64
105
  },
106
  {
107
  "epoch": 0.48,
108
- "eval_accuracy": 0.77,
109
- "eval_f1_score": 0.7680401002506266,
110
- "eval_gmean": 0.7871467461661771,
111
- "eval_loss": 0.6016992330551147,
112
- "eval_precision": 0.8126607818411097,
113
- "eval_recall": 0.77,
114
- "eval_runtime": 274.3064,
115
- "eval_samples_per_second": 0.365,
116
- "eval_steps_per_second": 0.047,
117
- "step": 64
118
- },
119
- {
120
- "epoch": 0.54,
121
- "grad_norm": 28.875,
122
- "learning_rate": 9.393939393939396e-06,
123
- "loss": 0.6553,
124
- "step": 72
125
- },
126
- {
127
- "epoch": 0.6,
128
- "grad_norm": 25.625,
129
- "learning_rate": 8.181818181818183e-06,
130
- "loss": 0.5781,
131
- "step": 80
132
  },
133
  {
134
  "epoch": 0.6,
135
- "eval_accuracy": 0.78,
136
- "eval_f1_score": 0.7785897435897436,
137
- "eval_gmean": 0.7961000720231877,
138
- "eval_loss": 0.5736328363418579,
139
- "eval_precision": 0.8186666666666668,
140
- "eval_recall": 0.78,
141
- "eval_runtime": 273.4905,
142
- "eval_samples_per_second": 0.366,
143
- "eval_steps_per_second": 0.048,
144
- "step": 80
145
- },
146
- {
147
- "epoch": 0.66,
148
- "grad_norm": 16.5,
149
- "learning_rate": 6.969696969696971e-06,
150
- "loss": 0.56,
151
- "step": 88
152
  },
153
  {
154
  "epoch": 0.72,
155
- "grad_norm": 32.5,
156
- "learning_rate": 5.7575757575757586e-06,
157
- "loss": 0.6035,
158
- "step": 96
159
  },
160
  {
161
  "epoch": 0.72,
162
- "eval_accuracy": 0.78,
163
- "eval_f1_score": 0.7785897435897436,
164
- "eval_gmean": 0.7961000720231877,
165
- "eval_loss": 0.5596484541893005,
166
- "eval_precision": 0.8186666666666668,
167
- "eval_recall": 0.78,
168
- "eval_runtime": 273.6048,
169
- "eval_samples_per_second": 0.365,
170
- "eval_steps_per_second": 0.048,
171
- "step": 96
172
- },
173
- {
174
- "epoch": 0.78,
175
- "grad_norm": 24.0,
176
- "learning_rate": 4.5454545454545455e-06,
177
- "loss": 0.6456,
178
- "step": 104
179
- },
180
- {
181
- "epoch": 0.84,
182
- "grad_norm": 34.75,
183
- "learning_rate": 3.3333333333333333e-06,
184
- "loss": 0.655,
185
- "step": 112
186
  },
187
  {
188
  "epoch": 0.84,
189
- "eval_accuracy": 0.78,
190
- "eval_f1_score": 0.7785897435897436,
191
- "eval_gmean": 0.7961000720231877,
192
- "eval_loss": 0.5552539229393005,
193
- "eval_precision": 0.8186666666666668,
194
- "eval_recall": 0.78,
195
- "eval_runtime": 273.6051,
196
- "eval_samples_per_second": 0.365,
197
- "eval_steps_per_second": 0.048,
198
- "step": 112
199
- },
200
- {
201
- "epoch": 0.9,
202
- "grad_norm": 25.625,
203
- "learning_rate": 2.1212121212121216e-06,
204
- "loss": 0.5464,
205
- "step": 120
206
  },
207
  {
208
  "epoch": 0.96,
209
- "grad_norm": 16.625,
210
- "learning_rate": 9.090909090909091e-07,
211
- "loss": 0.606,
212
- "step": 128
213
  },
214
  {
215
  "epoch": 0.96,
216
- "eval_accuracy": 0.78,
217
- "eval_f1_score": 0.7785897435897436,
218
- "eval_gmean": 0.7961000720231877,
219
- "eval_loss": 0.5531054735183716,
220
- "eval_precision": 0.8186666666666668,
221
- "eval_recall": 0.78,
222
- "eval_runtime": 273.8068,
223
- "eval_samples_per_second": 0.365,
224
- "eval_steps_per_second": 0.047,
225
- "step": 128
226
  },
227
  {
228
  "epoch": 1.0,
229
- "step": 134,
230
- "total_flos": 1.6845278653034496e+17,
231
- "train_loss": 0.7047583736590485,
232
- "train_runtime": 28553.3073,
233
- "train_samples_per_second": 0.3,
234
  "train_steps_per_second": 0.005
235
  }
236
  ],
237
  "logging_steps": 8,
238
- "max_steps": 134,
239
  "num_input_tokens_seen": 0,
240
  "num_train_epochs": 1,
241
  "save_steps": 500,
242
- "total_flos": 1.6845278653034496e+17,
243
  "train_batch_size": 16,
244
  "trial_name": null,
245
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 16,
6
+ "global_step": 67,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.12,
13
+ "grad_norm": 22.625,
14
+ "learning_rate": 1.8153846153846155e-05,
15
+ "loss": 0.6146,
16
+ "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 0.24,
20
+ "grad_norm": 23.375,
21
+ "learning_rate": 1.5692307692307693e-05,
22
+ "loss": 0.5536,
23
+ "step": 16
24
  },
25
  {
26
  "epoch": 0.24,
27
+ "eval_accuracy": 0.84,
28
+ "eval_f1_score": 0.8403201280512205,
29
+ "eval_gmean": 0.849780720378659,
30
+ "eval_loss": 0.4766210913658142,
31
+ "eval_precision": 0.8575523349436394,
32
+ "eval_recall": 0.84,
33
+ "eval_runtime": 281.1532,
34
+ "eval_samples_per_second": 0.356,
35
+ "eval_steps_per_second": 0.046,
36
+ "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  },
38
  {
39
  "epoch": 0.36,
40
+ "grad_norm": 12.5,
41
+ "learning_rate": 1.3230769230769231e-05,
42
+ "loss": 0.5034,
43
+ "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  },
45
  {
46
  "epoch": 0.48,
47
+ "grad_norm": 9.0625,
48
+ "learning_rate": 1.076923076923077e-05,
49
+ "loss": 0.4886,
50
+ "step": 32
51
  },
52
  {
53
  "epoch": 0.48,
54
+ "eval_accuracy": 0.85,
55
+ "eval_f1_score": 0.8504053648283457,
56
+ "eval_gmean": 0.8587214900086042,
57
+ "eval_loss": 0.4527539014816284,
58
+ "eval_precision": 0.8646326776395022,
59
+ "eval_recall": 0.85,
60
+ "eval_runtime": 280.8117,
61
+ "eval_samples_per_second": 0.356,
62
+ "eval_steps_per_second": 0.046,
63
+ "step": 32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  {
66
  "epoch": 0.6,
67
+ "grad_norm": 14.0,
68
+ "learning_rate": 8.307692307692309e-06,
69
+ "loss": 0.4542,
70
+ "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
  "epoch": 0.72,
74
+ "grad_norm": 21.625,
75
+ "learning_rate": 5.846153846153847e-06,
76
+ "loss": 0.4781,
77
+ "step": 48
78
  },
79
  {
80
  "epoch": 0.72,
81
+ "eval_accuracy": 0.85,
82
+ "eval_f1_score": 0.8504053648283457,
83
+ "eval_gmean": 0.8587214900086042,
84
+ "eval_loss": 0.44312500953674316,
85
+ "eval_precision": 0.8646326776395022,
86
+ "eval_recall": 0.85,
87
+ "eval_runtime": 280.8661,
88
+ "eval_samples_per_second": 0.356,
89
+ "eval_steps_per_second": 0.046,
90
+ "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  },
92
  {
93
  "epoch": 0.84,
94
+ "grad_norm": 16.5,
95
+ "learning_rate": 3.384615384615385e-06,
96
+ "loss": 0.4712,
97
+ "step": 56
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  },
99
  {
100
  "epoch": 0.96,
101
+ "grad_norm": 11.9375,
102
+ "learning_rate": 9.230769230769232e-07,
103
+ "loss": 0.4697,
104
+ "step": 64
105
  },
106
  {
107
  "epoch": 0.96,
108
+ "eval_accuracy": 0.85,
109
+ "eval_f1_score": 0.8504053648283457,
110
+ "eval_gmean": 0.8587214900086042,
111
+ "eval_loss": 0.44022461771965027,
112
+ "eval_precision": 0.8646326776395022,
113
+ "eval_recall": 0.85,
114
+ "eval_runtime": 281.3025,
115
+ "eval_samples_per_second": 0.355,
116
+ "eval_steps_per_second": 0.046,
117
+ "step": 64
118
  },
119
  {
120
  "epoch": 1.0,
121
+ "step": 67,
122
+ "total_flos": 8.435821078904832e+16,
123
+ "train_loss": 0.508428317397388,
124
+ "train_runtime": 14877.7814,
125
+ "train_samples_per_second": 0.288,
126
  "train_steps_per_second": 0.005
127
  }
128
  ],
129
  "logging_steps": 8,
130
+ "max_steps": 67,
131
  "num_input_tokens_seen": 0,
132
  "num_train_epochs": 1,
133
  "save_steps": 500,
134
+ "total_flos": 8.435821078904832e+16,
135
  "train_batch_size": 16,
136
  "trial_name": null,
137
  "trial_params": null