danield12 commited on
Commit
c911bad
·
verified ·
1 Parent(s): f521d8b

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b05aa72c8805edc2083463c9297e7c8886e44910950afe1ea2aab11cd5a1cae
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247fc2b9ff6d183a31785d3f2a3b4287662c9c74c22d00915bfc7758ee61e6a1
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.6666666666666666,
4
- "eval_f1_score": 0.6865530303030304,
5
- "eval_gmean": 0.587807253097444,
6
- "eval_loss": 0.778515636920929,
7
- "eval_precision": 0.7180145485665382,
8
- "eval_recall": 0.6666666666666666,
9
- "eval_runtime": 16.493,
10
- "eval_samples_per_second": 3.638,
11
- "eval_steps_per_second": 0.485,
12
  "total_flos": 2.597706419798016e+16,
13
- "train_loss": 0.793304885643116,
14
- "train_runtime": 5844.8313,
15
- "train_samples_per_second": 3.03,
16
- "train_steps_per_second": 0.024
17
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.7666666666666667,
4
+ "eval_f1_score": 0.7609087348217781,
5
+ "eval_gmean": 0.7486572528549951,
6
+ "eval_loss": 0.5165690183639526,
7
+ "eval_precision": 0.7716666666666666,
8
+ "eval_recall": 0.7666666666666667,
9
+ "eval_runtime": 170.2423,
10
+ "eval_samples_per_second": 0.352,
11
+ "eval_steps_per_second": 0.047,
12
  "total_flos": 2.597706419798016e+16,
13
+ "train_loss": 0.7354364809782609,
14
+ "train_runtime": 6240.8638,
15
+ "train_samples_per_second": 2.838,
16
+ "train_steps_per_second": 0.022
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.6666666666666666,
4
- "eval_f1_score": 0.6865530303030304,
5
- "eval_gmean": 0.587807253097444,
6
- "eval_loss": 0.778515636920929,
7
- "eval_precision": 0.7180145485665382,
8
- "eval_recall": 0.6666666666666666,
9
- "eval_runtime": 16.493,
10
- "eval_samples_per_second": 3.638,
11
- "eval_steps_per_second": 0.485
12
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.7666666666666667,
4
+ "eval_f1_score": 0.7609087348217781,
5
+ "eval_gmean": 0.7486572528549951,
6
+ "eval_loss": 0.5165690183639526,
7
+ "eval_precision": 0.7716666666666666,
8
+ "eval_recall": 0.7666666666666667,
9
+ "eval_runtime": 170.2423,
10
+ "eval_samples_per_second": 0.352,
11
+ "eval_steps_per_second": 0.047
12
  }
runs/Apr02_15-26-42_06ec12e6d0c6/events.out.tfevents.1712071604.06ec12e6d0c6.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15684f75daf1c2a6508fcef6bfd39a4b634483fe679b7ecdcb1c8e0eff8f9ab8
3
+ size 10883
runs/Apr02_15-26-42_06ec12e6d0c6/events.out.tfevents.1712078015.06ec12e6d0c6.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731314460feaaf4b8bae44c2ad5501d610219efe3fdedd3c19b1c050313bcabc
3
+ size 615
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2.597706419798016e+16,
4
- "train_loss": 0.793304885643116,
5
- "train_runtime": 5844.8313,
6
- "train_samples_per_second": 3.03,
7
- "train_steps_per_second": 0.024
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2.597706419798016e+16,
4
+ "train_loss": 0.7354364809782609,
5
+ "train_runtime": 6240.8638,
6
+ "train_samples_per_second": 2.838,
7
+ "train_steps_per_second": 0.022
8
  }
trainer_state.json CHANGED
@@ -10,183 +10,183 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.06,
13
- "grad_norm": 23.125,
14
  "learning_rate": 1.911764705882353e-05,
15
- "loss": 0.9447,
16
  "step": 8
17
  },
18
  {
19
  "epoch": 0.12,
20
- "grad_norm": 29.25,
21
  "learning_rate": 1.7941176470588237e-05,
22
- "loss": 0.958,
23
  "step": 16
24
  },
25
  {
26
  "epoch": 0.17,
27
- "grad_norm": 19.0,
28
  "learning_rate": 1.6764705882352943e-05,
29
- "loss": 0.8606,
30
  "step": 24
31
  },
32
  {
33
  "epoch": 0.2,
34
- "eval_accuracy": 0.65,
35
- "eval_f1_score": 0.6788342559426896,
36
- "eval_gmean": 0.6373565754389918,
37
- "eval_loss": 0.8578125238418579,
38
- "eval_precision": 0.7467592592592592,
39
- "eval_recall": 0.65,
40
- "eval_runtime": 16.3545,
41
- "eval_samples_per_second": 3.669,
42
- "eval_steps_per_second": 0.489,
43
  "step": 28
44
  },
45
  {
46
  "epoch": 0.23,
47
- "grad_norm": 11.1875,
48
  "learning_rate": 1.558823529411765e-05,
49
- "loss": 0.8073,
50
  "step": 32
51
  },
52
  {
53
  "epoch": 0.29,
54
- "grad_norm": 10.125,
55
  "learning_rate": 1.4411764705882353e-05,
56
- "loss": 0.7408,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.35,
61
- "grad_norm": 6.75,
62
  "learning_rate": 1.323529411764706e-05,
63
- "loss": 0.7804,
64
  "step": 48
65
  },
66
  {
67
  "epoch": 0.4,
68
- "grad_norm": 9.6875,
69
  "learning_rate": 1.2058823529411765e-05,
70
- "loss": 0.8548,
71
  "step": 56
72
  },
73
  {
74
  "epoch": 0.4,
75
- "eval_accuracy": 0.6666666666666666,
76
- "eval_f1_score": 0.6865530303030304,
77
- "eval_gmean": 0.587807253097444,
78
- "eval_loss": 0.803906261920929,
79
- "eval_precision": 0.7180145485665382,
80
- "eval_recall": 0.6666666666666666,
81
- "eval_runtime": 16.3685,
82
- "eval_samples_per_second": 3.666,
83
- "eval_steps_per_second": 0.489,
84
  "step": 56
85
  },
86
  {
87
  "epoch": 0.46,
88
- "grad_norm": 7.15625,
89
  "learning_rate": 1.0882352941176471e-05,
90
- "loss": 0.7523,
91
  "step": 64
92
  },
93
  {
94
  "epoch": 0.52,
95
- "grad_norm": 6.90625,
96
  "learning_rate": 9.705882352941177e-06,
97
- "loss": 0.7838,
98
  "step": 72
99
  },
100
  {
101
  "epoch": 0.58,
102
- "grad_norm": 10.125,
103
  "learning_rate": 8.529411764705883e-06,
104
- "loss": 0.7783,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.61,
109
- "eval_accuracy": 0.6666666666666666,
110
- "eval_f1_score": 0.6865530303030304,
111
- "eval_gmean": 0.587807253097444,
112
- "eval_loss": 0.7865885496139526,
113
- "eval_precision": 0.7180145485665382,
114
- "eval_recall": 0.6666666666666666,
115
- "eval_runtime": 16.3,
116
- "eval_samples_per_second": 3.681,
117
- "eval_steps_per_second": 0.491,
118
  "step": 84
119
  },
120
  {
121
  "epoch": 0.64,
122
- "grad_norm": 5.65625,
123
  "learning_rate": 7.352941176470589e-06,
124
- "loss": 0.7328,
125
  "step": 88
126
  },
127
  {
128
  "epoch": 0.69,
129
- "grad_norm": 6.25,
130
  "learning_rate": 6.176470588235295e-06,
131
- "loss": 0.6923,
132
  "step": 96
133
  },
134
  {
135
  "epoch": 0.75,
136
- "grad_norm": 11.375,
137
  "learning_rate": 5e-06,
138
- "loss": 0.7355,
139
  "step": 104
140
  },
141
  {
142
  "epoch": 0.81,
143
- "grad_norm": 10.6875,
144
  "learning_rate": 3.8235294117647055e-06,
145
- "loss": 0.783,
146
  "step": 112
147
  },
148
  {
149
  "epoch": 0.81,
150
- "eval_accuracy": 0.6666666666666666,
151
- "eval_f1_score": 0.6865530303030304,
152
- "eval_gmean": 0.587807253097444,
153
- "eval_loss": 0.7805989384651184,
154
- "eval_precision": 0.7180145485665382,
155
- "eval_recall": 0.6666666666666666,
156
- "eval_runtime": 16.5911,
157
- "eval_samples_per_second": 3.616,
158
- "eval_steps_per_second": 0.482,
159
  "step": 112
160
  },
161
  {
162
  "epoch": 0.87,
163
- "grad_norm": 8.875,
164
  "learning_rate": 2.647058823529412e-06,
165
- "loss": 0.7344,
166
  "step": 120
167
  },
168
  {
169
  "epoch": 0.92,
170
- "grad_norm": 5.28125,
171
  "learning_rate": 1.4705882352941177e-06,
172
- "loss": 0.7383,
173
  "step": 128
174
  },
175
  {
176
  "epoch": 0.98,
177
- "grad_norm": 8.125,
178
  "learning_rate": 2.9411764705882356e-07,
179
- "loss": 0.8187,
180
  "step": 136
181
  },
182
  {
183
  "epoch": 1.0,
184
  "step": 138,
185
  "total_flos": 2.597706419798016e+16,
186
- "train_loss": 0.793304885643116,
187
- "train_runtime": 5844.8313,
188
- "train_samples_per_second": 3.03,
189
- "train_steps_per_second": 0.024
190
  }
191
  ],
192
  "logging_steps": 8,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.06,
13
+ "grad_norm": 4.96875,
14
  "learning_rate": 1.911764705882353e-05,
15
+ "loss": 0.7457,
16
  "step": 8
17
  },
18
  {
19
  "epoch": 0.12,
20
+ "grad_norm": 12.75,
21
  "learning_rate": 1.7941176470588237e-05,
22
+ "loss": 0.7997,
23
  "step": 16
24
  },
25
  {
26
  "epoch": 0.17,
27
+ "grad_norm": 7.5625,
28
  "learning_rate": 1.6764705882352943e-05,
29
+ "loss": 0.7616,
30
  "step": 24
31
  },
32
  {
33
  "epoch": 0.2,
34
+ "eval_accuracy": 0.7666666666666667,
35
+ "eval_f1_score": 0.7609087348217781,
36
+ "eval_gmean": 0.7486572528549951,
37
+ "eval_loss": 0.5174153447151184,
38
+ "eval_precision": 0.7716666666666666,
39
+ "eval_recall": 0.7666666666666667,
40
+ "eval_runtime": 170.072,
41
+ "eval_samples_per_second": 0.353,
42
+ "eval_steps_per_second": 0.047,
43
  "step": 28
44
  },
45
  {
46
  "epoch": 0.23,
47
+ "grad_norm": 18.0,
48
  "learning_rate": 1.558823529411765e-05,
49
+ "loss": 0.7458,
50
  "step": 32
51
  },
52
  {
53
  "epoch": 0.29,
54
+ "grad_norm": 7.78125,
55
  "learning_rate": 1.4411764705882353e-05,
56
+ "loss": 0.691,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.35,
61
+ "grad_norm": 5.78125,
62
  "learning_rate": 1.323529411764706e-05,
63
+ "loss": 0.7286,
64
  "step": 48
65
  },
66
  {
67
  "epoch": 0.4,
68
+ "grad_norm": 6.40625,
69
  "learning_rate": 1.2058823529411765e-05,
70
+ "loss": 0.8156,
71
  "step": 56
72
  },
73
  {
74
  "epoch": 0.4,
75
+ "eval_accuracy": 0.7666666666666667,
76
+ "eval_f1_score": 0.7609087348217781,
77
+ "eval_gmean": 0.7486572528549951,
78
+ "eval_loss": 0.5176106691360474,
79
+ "eval_precision": 0.7716666666666666,
80
+ "eval_recall": 0.7666666666666667,
81
+ "eval_runtime": 170.1289,
82
+ "eval_samples_per_second": 0.353,
83
+ "eval_steps_per_second": 0.047,
84
  "step": 56
85
  },
86
  {
87
  "epoch": 0.46,
88
+ "grad_norm": 10.3125,
89
  "learning_rate": 1.0882352941176471e-05,
90
+ "loss": 0.7198,
91
  "step": 64
92
  },
93
  {
94
  "epoch": 0.52,
95
+ "grad_norm": 9.0625,
96
  "learning_rate": 9.705882352941177e-06,
97
+ "loss": 0.7421,
98
  "step": 72
99
  },
100
  {
101
  "epoch": 0.58,
102
+ "grad_norm": 6.625,
103
  "learning_rate": 8.529411764705883e-06,
104
+ "loss": 0.7488,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.61,
109
+ "eval_accuracy": 0.7666666666666667,
110
+ "eval_f1_score": 0.7609087348217781,
111
+ "eval_gmean": 0.7486572528549951,
112
+ "eval_loss": 0.5164387822151184,
113
+ "eval_precision": 0.7716666666666666,
114
+ "eval_recall": 0.7666666666666667,
115
+ "eval_runtime": 170.831,
116
+ "eval_samples_per_second": 0.351,
117
+ "eval_steps_per_second": 0.047,
118
  "step": 84
119
  },
120
  {
121
  "epoch": 0.64,
122
+ "grad_norm": 6.125,
123
  "learning_rate": 7.352941176470589e-06,
124
+ "loss": 0.7045,
125
  "step": 88
126
  },
127
  {
128
  "epoch": 0.69,
129
+ "grad_norm": 4.625,
130
  "learning_rate": 6.176470588235295e-06,
131
+ "loss": 0.6564,
132
  "step": 96
133
  },
134
  {
135
  "epoch": 0.75,
136
+ "grad_norm": 8.3125,
137
  "learning_rate": 5e-06,
138
+ "loss": 0.7003,
139
  "step": 104
140
  },
141
  {
142
  "epoch": 0.81,
143
+ "grad_norm": 12.9375,
144
  "learning_rate": 3.8235294117647055e-06,
145
+ "loss": 0.7528,
146
  "step": 112
147
  },
148
  {
149
  "epoch": 0.81,
150
+ "eval_accuracy": 0.7666666666666667,
151
+ "eval_f1_score": 0.7609087348217781,
152
+ "eval_gmean": 0.7486572528549951,
153
+ "eval_loss": 0.5166015625,
154
+ "eval_precision": 0.7716666666666666,
155
+ "eval_recall": 0.7666666666666667,
156
+ "eval_runtime": 170.3596,
157
+ "eval_samples_per_second": 0.352,
158
+ "eval_steps_per_second": 0.047,
159
  "step": 112
160
  },
161
  {
162
  "epoch": 0.87,
163
+ "grad_norm": 7.46875,
164
  "learning_rate": 2.647058823529412e-06,
165
+ "loss": 0.7045,
166
  "step": 120
167
  },
168
  {
169
  "epoch": 0.92,
170
+ "grad_norm": 5.8125,
171
  "learning_rate": 1.4705882352941177e-06,
172
+ "loss": 0.7029,
173
  "step": 128
174
  },
175
  {
176
  "epoch": 0.98,
177
+ "grad_norm": 10.1875,
178
  "learning_rate": 2.9411764705882356e-07,
179
+ "loss": 0.7869,
180
  "step": 136
181
  },
182
  {
183
  "epoch": 1.0,
184
  "step": 138,
185
  "total_flos": 2.597706419798016e+16,
186
+ "train_loss": 0.7354364809782609,
187
+ "train_runtime": 6240.8638,
188
+ "train_samples_per_second": 2.838,
189
+ "train_steps_per_second": 0.022
190
  }
191
  ],
192
  "logging_steps": 8,