danield12 commited on
Commit
f00b7cc
·
verified ·
1 Parent(s): 74aa76d

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247fc2b9ff6d183a31785d3f2a3b4287662c9c74c22d00915bfc7758ee61e6a1
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e2a19914eaa14513f860a0e901e2e1f0a0a709a4d92ea72ec0c3c855b69fd8
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.7666666666666667,
4
- "eval_f1_score": 0.7609087348217781,
5
- "eval_gmean": 0.7486572528549951,
6
- "eval_loss": 0.5165690183639526,
7
- "eval_precision": 0.7716666666666666,
8
- "eval_recall": 0.7666666666666667,
9
- "eval_runtime": 170.2423,
10
- "eval_samples_per_second": 0.352,
11
- "eval_steps_per_second": 0.047,
12
- "total_flos": 2.597706419798016e+16,
13
- "train_loss": 0.7354364809782609,
14
- "train_runtime": 6240.8638,
15
- "train_samples_per_second": 2.838,
16
- "train_steps_per_second": 0.022
17
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.8166666666666667,
4
+ "eval_f1_score": 0.8080459770114942,
5
+ "eval_gmean": 0.7501820727030913,
6
+ "eval_loss": 0.5850911736488342,
7
+ "eval_precision": 0.8133022774327122,
8
+ "eval_recall": 0.8166666666666667,
9
+ "eval_runtime": 175.3378,
10
+ "eval_samples_per_second": 0.342,
11
+ "eval_steps_per_second": 0.046,
12
+ "total_flos": 8.435821078904832e+16,
13
+ "train_loss": 0.405215135261194,
14
+ "train_runtime": 12957.5785,
15
+ "train_samples_per_second": 0.33,
16
+ "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.7666666666666667,
4
- "eval_f1_score": 0.7609087348217781,
5
- "eval_gmean": 0.7486572528549951,
6
- "eval_loss": 0.5165690183639526,
7
- "eval_precision": 0.7716666666666666,
8
- "eval_recall": 0.7666666666666667,
9
- "eval_runtime": 170.2423,
10
- "eval_samples_per_second": 0.352,
11
- "eval_steps_per_second": 0.047
12
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.8166666666666667,
4
+ "eval_f1_score": 0.8080459770114942,
5
+ "eval_gmean": 0.7501820727030913,
6
+ "eval_loss": 0.5850911736488342,
7
+ "eval_precision": 0.8133022774327122,
8
+ "eval_recall": 0.8166666666666667,
9
+ "eval_runtime": 175.3378,
10
+ "eval_samples_per_second": 0.342,
11
+ "eval_steps_per_second": 0.046
12
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 2.597706419798016e+16,
4
- "train_loss": 0.7354364809782609,
5
- "train_runtime": 6240.8638,
6
- "train_samples_per_second": 2.838,
7
- "train_steps_per_second": 0.022
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 8.435821078904832e+16,
4
+ "train_loss": 0.405215135261194,
5
+ "train_runtime": 12957.5785,
6
+ "train_samples_per_second": 0.33,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -1,201 +1,112 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9963898916967509,
5
  "eval_steps": 28,
6
- "global_step": 138,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06,
13
- "grad_norm": 4.96875,
14
- "learning_rate": 1.911764705882353e-05,
15
- "loss": 0.7457,
16
  "step": 8
17
  },
18
  {
19
- "epoch": 0.12,
20
- "grad_norm": 12.75,
21
- "learning_rate": 1.7941176470588237e-05,
22
- "loss": 0.7997,
23
  "step": 16
24
  },
25
  {
26
- "epoch": 0.17,
27
- "grad_norm": 7.5625,
28
- "learning_rate": 1.6764705882352943e-05,
29
- "loss": 0.7616,
30
  "step": 24
31
  },
32
  {
33
- "epoch": 0.2,
34
- "eval_accuracy": 0.7666666666666667,
35
- "eval_f1_score": 0.7609087348217781,
36
- "eval_gmean": 0.7486572528549951,
37
- "eval_loss": 0.5174153447151184,
38
- "eval_precision": 0.7716666666666666,
39
- "eval_recall": 0.7666666666666667,
40
- "eval_runtime": 170.072,
41
- "eval_samples_per_second": 0.353,
42
- "eval_steps_per_second": 0.047,
43
  "step": 28
44
  },
45
  {
46
- "epoch": 0.23,
47
- "grad_norm": 18.0,
48
- "learning_rate": 1.558823529411765e-05,
49
- "loss": 0.7458,
50
  "step": 32
51
  },
52
  {
53
- "epoch": 0.29,
54
- "grad_norm": 7.78125,
55
- "learning_rate": 1.4411764705882353e-05,
56
- "loss": 0.691,
57
  "step": 40
58
  },
59
  {
60
- "epoch": 0.35,
61
- "grad_norm": 5.78125,
62
- "learning_rate": 1.323529411764706e-05,
63
- "loss": 0.7286,
64
  "step": 48
65
  },
66
  {
67
- "epoch": 0.4,
68
- "grad_norm": 6.40625,
69
- "learning_rate": 1.2058823529411765e-05,
70
- "loss": 0.8156,
71
  "step": 56
72
  },
73
  {
74
- "epoch": 0.4,
75
- "eval_accuracy": 0.7666666666666667,
76
- "eval_f1_score": 0.7609087348217781,
77
- "eval_gmean": 0.7486572528549951,
78
- "eval_loss": 0.5176106691360474,
79
- "eval_precision": 0.7716666666666666,
80
- "eval_recall": 0.7666666666666667,
81
- "eval_runtime": 170.1289,
82
- "eval_samples_per_second": 0.353,
83
- "eval_steps_per_second": 0.047,
84
  "step": 56
85
  },
86
  {
87
- "epoch": 0.46,
88
- "grad_norm": 10.3125,
89
- "learning_rate": 1.0882352941176471e-05,
90
- "loss": 0.7198,
91
  "step": 64
92
  },
93
- {
94
- "epoch": 0.52,
95
- "grad_norm": 9.0625,
96
- "learning_rate": 9.705882352941177e-06,
97
- "loss": 0.7421,
98
- "step": 72
99
- },
100
- {
101
- "epoch": 0.58,
102
- "grad_norm": 6.625,
103
- "learning_rate": 8.529411764705883e-06,
104
- "loss": 0.7488,
105
- "step": 80
106
- },
107
- {
108
- "epoch": 0.61,
109
- "eval_accuracy": 0.7666666666666667,
110
- "eval_f1_score": 0.7609087348217781,
111
- "eval_gmean": 0.7486572528549951,
112
- "eval_loss": 0.5164387822151184,
113
- "eval_precision": 0.7716666666666666,
114
- "eval_recall": 0.7666666666666667,
115
- "eval_runtime": 170.831,
116
- "eval_samples_per_second": 0.351,
117
- "eval_steps_per_second": 0.047,
118
- "step": 84
119
- },
120
- {
121
- "epoch": 0.64,
122
- "grad_norm": 6.125,
123
- "learning_rate": 7.352941176470589e-06,
124
- "loss": 0.7045,
125
- "step": 88
126
- },
127
- {
128
- "epoch": 0.69,
129
- "grad_norm": 4.625,
130
- "learning_rate": 6.176470588235295e-06,
131
- "loss": 0.6564,
132
- "step": 96
133
- },
134
- {
135
- "epoch": 0.75,
136
- "grad_norm": 8.3125,
137
- "learning_rate": 5e-06,
138
- "loss": 0.7003,
139
- "step": 104
140
- },
141
- {
142
- "epoch": 0.81,
143
- "grad_norm": 12.9375,
144
- "learning_rate": 3.8235294117647055e-06,
145
- "loss": 0.7528,
146
- "step": 112
147
- },
148
- {
149
- "epoch": 0.81,
150
- "eval_accuracy": 0.7666666666666667,
151
- "eval_f1_score": 0.7609087348217781,
152
- "eval_gmean": 0.7486572528549951,
153
- "eval_loss": 0.5166015625,
154
- "eval_precision": 0.7716666666666666,
155
- "eval_recall": 0.7666666666666667,
156
- "eval_runtime": 170.3596,
157
- "eval_samples_per_second": 0.352,
158
- "eval_steps_per_second": 0.047,
159
- "step": 112
160
- },
161
- {
162
- "epoch": 0.87,
163
- "grad_norm": 7.46875,
164
- "learning_rate": 2.647058823529412e-06,
165
- "loss": 0.7045,
166
- "step": 120
167
- },
168
- {
169
- "epoch": 0.92,
170
- "grad_norm": 5.8125,
171
- "learning_rate": 1.4705882352941177e-06,
172
- "loss": 0.7029,
173
- "step": 128
174
- },
175
- {
176
- "epoch": 0.98,
177
- "grad_norm": 10.1875,
178
- "learning_rate": 2.9411764705882356e-07,
179
- "loss": 0.7869,
180
- "step": 136
181
- },
182
  {
183
  "epoch": 1.0,
184
- "step": 138,
185
- "total_flos": 2.597706419798016e+16,
186
- "train_loss": 0.7354364809782609,
187
- "train_runtime": 6240.8638,
188
- "train_samples_per_second": 2.838,
189
- "train_steps_per_second": 0.022
190
  }
191
  ],
192
  "logging_steps": 8,
193
- "max_steps": 138,
194
  "num_input_tokens_seen": 0,
195
  "num_train_epochs": 1,
196
  "save_steps": 500,
197
- "total_flos": 2.597706419798016e+16,
198
- "train_batch_size": 32,
199
  "trial_name": null,
200
  "trial_params": null
201
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 28,
6
+ "global_step": 67,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.12,
13
+ "grad_norm": 9.4375,
14
+ "learning_rate": 1.8153846153846155e-05,
15
+ "loss": 0.4282,
16
  "step": 8
17
  },
18
  {
19
+ "epoch": 0.24,
20
+ "grad_norm": 10.5,
21
+ "learning_rate": 1.5692307692307693e-05,
22
+ "loss": 0.4335,
23
  "step": 16
24
  },
25
  {
26
+ "epoch": 0.36,
27
+ "grad_norm": 5.03125,
28
+ "learning_rate": 1.3230769230769231e-05,
29
+ "loss": 0.3984,
30
  "step": 24
31
  },
32
  {
33
+ "epoch": 0.42,
34
+ "eval_accuracy": 0.8166666666666667,
35
+ "eval_f1_score": 0.8080459770114942,
36
+ "eval_gmean": 0.7501820727030913,
37
+ "eval_loss": 0.5816406011581421,
38
+ "eval_precision": 0.8133022774327122,
39
+ "eval_recall": 0.8166666666666667,
40
+ "eval_runtime": 175.4354,
41
+ "eval_samples_per_second": 0.342,
42
+ "eval_steps_per_second": 0.046,
43
  "step": 28
44
  },
45
  {
46
+ "epoch": 0.48,
47
+ "grad_norm": 6.625,
48
+ "learning_rate": 1.076923076923077e-05,
49
+ "loss": 0.3979,
50
  "step": 32
51
  },
52
  {
53
+ "epoch": 0.6,
54
+ "grad_norm": 7.875,
55
+ "learning_rate": 8.307692307692309e-06,
56
+ "loss": 0.3657,
57
  "step": 40
58
  },
59
  {
60
+ "epoch": 0.72,
61
+ "grad_norm": 10.0,
62
+ "learning_rate": 5.846153846153847e-06,
63
+ "loss": 0.38,
64
  "step": 48
65
  },
66
  {
67
+ "epoch": 0.84,
68
+ "grad_norm": 9.6875,
69
+ "learning_rate": 3.384615384615385e-06,
70
+ "loss": 0.4054,
71
  "step": 56
72
  },
73
  {
74
+ "epoch": 0.84,
75
+ "eval_accuracy": 0.8166666666666667,
76
+ "eval_f1_score": 0.8080459770114942,
77
+ "eval_gmean": 0.7501820727030913,
78
+ "eval_loss": 0.5850911736488342,
79
+ "eval_precision": 0.8133022774327122,
80
+ "eval_recall": 0.8166666666666667,
81
+ "eval_runtime": 173.49,
82
+ "eval_samples_per_second": 0.346,
83
+ "eval_steps_per_second": 0.046,
84
  "step": 56
85
  },
86
  {
87
+ "epoch": 0.96,
88
+ "grad_norm": 8.8125,
89
+ "learning_rate": 9.230769230769232e-07,
90
+ "loss": 0.3864,
91
  "step": 64
92
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  {
94
  "epoch": 1.0,
95
+ "step": 67,
96
+ "total_flos": 8.435821078904832e+16,
97
+ "train_loss": 0.405215135261194,
98
+ "train_runtime": 12957.5785,
99
+ "train_samples_per_second": 0.33,
100
+ "train_steps_per_second": 0.005
101
  }
102
  ],
103
  "logging_steps": 8,
104
+ "max_steps": 67,
105
  "num_input_tokens_seen": 0,
106
  "num_train_epochs": 1,
107
  "save_steps": 500,
108
+ "total_flos": 8.435821078904832e+16,
109
+ "train_batch_size": 16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }