danield12 commited on
Commit
2dd15fa
·
verified ·
1 Parent(s): c911bad

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247fc2b9ff6d183a31785d3f2a3b4287662c9c74c22d00915bfc7758ee61e6a1
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab078ca42c5033790a59a7d1b8ea5cb025038f8778b8bb368725c814de1dcb99
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.7666666666666667,
4
- "eval_f1_score": 0.7609087348217781,
5
- "eval_gmean": 0.7486572528549951,
6
- "eval_loss": 0.5165690183639526,
7
- "eval_precision": 0.7716666666666666,
8
- "eval_recall": 0.7666666666666667,
9
- "eval_runtime": 170.2423,
10
- "eval_samples_per_second": 0.352,
11
- "eval_steps_per_second": 0.047,
12
- "total_flos": 2.597706419798016e+16,
13
- "train_loss": 0.7354364809782609,
14
- "train_runtime": 6240.8638,
15
- "train_samples_per_second": 2.838,
16
- "train_steps_per_second": 0.022
17
  }
 
1
  {
2
+ "epoch": 1.95,
3
+ "eval_accuracy": 0.85,
4
+ "eval_f1_score": 0.8512605042016806,
5
+ "eval_gmean": 0.8238099238888024,
6
+ "eval_loss": 0.35761719942092896,
7
+ "eval_precision": 0.853042328042328,
8
+ "eval_recall": 0.85,
9
+ "eval_runtime": 183.8915,
10
+ "eval_samples_per_second": 0.326,
11
+ "eval_steps_per_second": 0.044,
12
+ "total_flos": 2.536276286767104e+16,
13
+ "train_loss": 2.198583984375,
14
+ "train_runtime": 3976.3696,
15
+ "train_samples_per_second": 0.323,
16
+ "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_accuracy": 0.7666666666666667,
4
- "eval_f1_score": 0.7609087348217781,
5
- "eval_gmean": 0.7486572528549951,
6
- "eval_loss": 0.5165690183639526,
7
- "eval_precision": 0.7716666666666666,
8
- "eval_recall": 0.7666666666666667,
9
- "eval_runtime": 170.2423,
10
- "eval_samples_per_second": 0.352,
11
- "eval_steps_per_second": 0.047
12
  }
 
1
  {
2
+ "epoch": 1.95,
3
+ "eval_accuracy": 0.85,
4
+ "eval_f1_score": 0.8512605042016806,
5
+ "eval_gmean": 0.8238099238888024,
6
+ "eval_loss": 0.35761719942092896,
7
+ "eval_precision": 0.853042328042328,
8
+ "eval_recall": 0.85,
9
+ "eval_runtime": 183.8915,
10
+ "eval_samples_per_second": 0.326,
11
+ "eval_steps_per_second": 0.044
12
  }
runs/Apr02_17-26-38_31946dd99e45/events.out.tfevents.1712078800.31946dd99e45.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96fa4882ae747c17ae6288324d4a105bd8f2ae5dd58f60c2079ff967f2398820
3
+ size 4955
runs/Apr02_17-29-10_31946dd99e45/events.out.tfevents.1712078950.31946dd99e45.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9970172f636f426cd33f169855d48cf2f4881c2a5897cb54c74bf0e5092d4a3
3
+ size 5717
runs/Apr02_17-29-10_31946dd99e45/events.out.tfevents.1712083110.31946dd99e45.34.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10bde09e68dd9cd3cb5b78de3ee14312b99d065290ec56a677e02b4f5ff93407
3
+ size 605
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 2.597706419798016e+16,
4
- "train_loss": 0.7354364809782609,
5
- "train_runtime": 6240.8638,
6
- "train_samples_per_second": 2.838,
7
- "train_steps_per_second": 0.022
8
  }
 
1
  {
2
+ "epoch": 1.95,
3
+ "total_flos": 2.536276286767104e+16,
4
+ "train_loss": 2.198583984375,
5
+ "train_runtime": 3976.3696,
6
+ "train_samples_per_second": 0.323,
7
+ "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -1,201 +1,44 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9963898916967509,
5
  "eval_steps": 28,
6
- "global_step": 138,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06,
13
- "grad_norm": 4.96875,
14
- "learning_rate": 1.911764705882353e-05,
15
- "loss": 0.7457,
16
  "step": 8
17
  },
18
  {
19
- "epoch": 0.12,
20
- "grad_norm": 12.75,
21
- "learning_rate": 1.7941176470588237e-05,
22
- "loss": 0.7997,
23
  "step": 16
24
  },
25
  {
26
- "epoch": 0.17,
27
- "grad_norm": 7.5625,
28
- "learning_rate": 1.6764705882352943e-05,
29
- "loss": 0.7616,
30
- "step": 24
31
- },
32
- {
33
- "epoch": 0.2,
34
- "eval_accuracy": 0.7666666666666667,
35
- "eval_f1_score": 0.7609087348217781,
36
- "eval_gmean": 0.7486572528549951,
37
- "eval_loss": 0.5174153447151184,
38
- "eval_precision": 0.7716666666666666,
39
- "eval_recall": 0.7666666666666667,
40
- "eval_runtime": 170.072,
41
- "eval_samples_per_second": 0.353,
42
- "eval_steps_per_second": 0.047,
43
- "step": 28
44
- },
45
- {
46
- "epoch": 0.23,
47
- "grad_norm": 18.0,
48
- "learning_rate": 1.558823529411765e-05,
49
- "loss": 0.7458,
50
- "step": 32
51
- },
52
- {
53
- "epoch": 0.29,
54
- "grad_norm": 7.78125,
55
- "learning_rate": 1.4411764705882353e-05,
56
- "loss": 0.691,
57
- "step": 40
58
- },
59
- {
60
- "epoch": 0.35,
61
- "grad_norm": 5.78125,
62
- "learning_rate": 1.323529411764706e-05,
63
- "loss": 0.7286,
64
- "step": 48
65
- },
66
- {
67
- "epoch": 0.4,
68
- "grad_norm": 6.40625,
69
- "learning_rate": 1.2058823529411765e-05,
70
- "loss": 0.8156,
71
- "step": 56
72
- },
73
- {
74
- "epoch": 0.4,
75
- "eval_accuracy": 0.7666666666666667,
76
- "eval_f1_score": 0.7609087348217781,
77
- "eval_gmean": 0.7486572528549951,
78
- "eval_loss": 0.5176106691360474,
79
- "eval_precision": 0.7716666666666666,
80
- "eval_recall": 0.7666666666666667,
81
- "eval_runtime": 170.1289,
82
- "eval_samples_per_second": 0.353,
83
- "eval_steps_per_second": 0.047,
84
- "step": 56
85
- },
86
- {
87
- "epoch": 0.46,
88
- "grad_norm": 10.3125,
89
- "learning_rate": 1.0882352941176471e-05,
90
- "loss": 0.7198,
91
- "step": 64
92
- },
93
- {
94
- "epoch": 0.52,
95
- "grad_norm": 9.0625,
96
- "learning_rate": 9.705882352941177e-06,
97
- "loss": 0.7421,
98
- "step": 72
99
- },
100
- {
101
- "epoch": 0.58,
102
- "grad_norm": 6.625,
103
- "learning_rate": 8.529411764705883e-06,
104
- "loss": 0.7488,
105
- "step": 80
106
- },
107
- {
108
- "epoch": 0.61,
109
- "eval_accuracy": 0.7666666666666667,
110
- "eval_f1_score": 0.7609087348217781,
111
- "eval_gmean": 0.7486572528549951,
112
- "eval_loss": 0.5164387822151184,
113
- "eval_precision": 0.7716666666666666,
114
- "eval_recall": 0.7666666666666667,
115
- "eval_runtime": 170.831,
116
- "eval_samples_per_second": 0.351,
117
- "eval_steps_per_second": 0.047,
118
- "step": 84
119
- },
120
- {
121
- "epoch": 0.64,
122
- "grad_norm": 6.125,
123
- "learning_rate": 7.352941176470589e-06,
124
- "loss": 0.7045,
125
- "step": 88
126
- },
127
- {
128
- "epoch": 0.69,
129
- "grad_norm": 4.625,
130
- "learning_rate": 6.176470588235295e-06,
131
- "loss": 0.6564,
132
- "step": 96
133
- },
134
- {
135
- "epoch": 0.75,
136
- "grad_norm": 8.3125,
137
- "learning_rate": 5e-06,
138
- "loss": 0.7003,
139
- "step": 104
140
- },
141
- {
142
- "epoch": 0.81,
143
- "grad_norm": 12.9375,
144
- "learning_rate": 3.8235294117647055e-06,
145
- "loss": 0.7528,
146
- "step": 112
147
- },
148
- {
149
- "epoch": 0.81,
150
- "eval_accuracy": 0.7666666666666667,
151
- "eval_f1_score": 0.7609087348217781,
152
- "eval_gmean": 0.7486572528549951,
153
- "eval_loss": 0.5166015625,
154
- "eval_precision": 0.7716666666666666,
155
- "eval_recall": 0.7666666666666667,
156
- "eval_runtime": 170.3596,
157
- "eval_samples_per_second": 0.352,
158
- "eval_steps_per_second": 0.047,
159
- "step": 112
160
- },
161
- {
162
- "epoch": 0.87,
163
- "grad_norm": 7.46875,
164
- "learning_rate": 2.647058823529412e-06,
165
- "loss": 0.7045,
166
- "step": 120
167
- },
168
- {
169
- "epoch": 0.92,
170
- "grad_norm": 5.8125,
171
- "learning_rate": 1.4705882352941177e-06,
172
- "loss": 0.7029,
173
- "step": 128
174
- },
175
- {
176
- "epoch": 0.98,
177
- "grad_norm": 10.1875,
178
- "learning_rate": 2.9411764705882356e-07,
179
- "loss": 0.7869,
180
- "step": 136
181
- },
182
- {
183
- "epoch": 1.0,
184
- "step": 138,
185
- "total_flos": 2.597706419798016e+16,
186
- "train_loss": 0.7354364809782609,
187
- "train_runtime": 6240.8638,
188
- "train_samples_per_second": 2.838,
189
- "train_steps_per_second": 0.022
190
  }
191
  ],
192
  "logging_steps": 8,
193
- "max_steps": 138,
194
  "num_input_tokens_seen": 0,
195
- "num_train_epochs": 1,
196
  "save_steps": 500,
197
- "total_flos": 2.597706419798016e+16,
198
- "train_batch_size": 32,
199
  "trial_name": null,
200
  "trial_params": null
201
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.951219512195122,
5
  "eval_steps": 28,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.78,
13
+ "grad_norm": 39.25,
14
+ "learning_rate": 1.3333333333333333e-05,
15
+ "loss": 2.3276,
16
  "step": 8
17
  },
18
  {
19
+ "epoch": 1.56,
20
+ "grad_norm": 25.75,
21
+ "learning_rate": 4.444444444444444e-06,
22
+ "loss": 1.9921,
23
  "step": 16
24
  },
25
  {
26
+ "epoch": 1.95,
27
+ "step": 20,
28
+ "total_flos": 2.536276286767104e+16,
29
+ "train_loss": 2.198583984375,
30
+ "train_runtime": 3976.3696,
31
+ "train_samples_per_second": 0.323,
32
+ "train_steps_per_second": 0.005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ],
35
  "logging_steps": 8,
36
+ "max_steps": 20,
37
  "num_input_tokens_seen": 0,
38
+ "num_train_epochs": 2,
39
  "save_steps": 500,
40
+ "total_flos": 2.536276286767104e+16,
41
+ "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": null
44
  }