chansung commited on
Commit
addf27d
·
verified ·
1 Parent(s): 65563da

Model save

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 2.1894
24
 
25
  ## Model description
26
 
@@ -57,7 +57,7 @@ The following hyperparameters were used during training:
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:------:|:----:|:---------------:|
60
- | 1.8961 | 0.9959 | 121 | 2.1894 |
61
 
62
 
63
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.1987
24
 
25
  ## Model description
26
 
 
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:------:|:----:|:---------------:|
60
+ | 1.8855 | 0.9959 | 121 | 2.1987 |
61
 
62
 
63
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc68ea73bc478267e60d5083b556ef01a97e0bfb8219019b1b18416c29d76668
3
  size 12859872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d66f64061d19a913cabcf5244ff5cbbc23e6611a32afaf36bbce74e85ed33c2
3
  size 12859872
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 0.9958847736625515,
3
- "eval_loss": 2.1894350051879883,
4
- "eval_runtime": 1.2571,
5
- "eval_samples": 16,
6
- "eval_samples_per_second": 3.182,
7
- "eval_steps_per_second": 0.796,
8
  "total_flos": 3.689505230149386e+17,
9
- "train_loss": 10.321047170103089,
10
- "train_runtime": 1495.3969,
11
  "train_samples": 92634,
12
- "train_samples_per_second": 10.358,
13
- "train_steps_per_second": 0.081
14
  }
 
1
  {
2
  "epoch": 0.9958847736625515,
 
 
 
 
 
3
  "total_flos": 3.689505230149386e+17,
4
+ "train_loss": 10.36233426716702,
5
+ "train_runtime": 314.1127,
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 49.314,
8
+ "train_steps_per_second": 0.385
9
  }
runs/Nov18_00-42-14_main-milora-gemma7b-classification-0-0/events.out.tfevents.1731909175.main-milora-gemma7b-classification-0-0.457.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aa5995d0b07fac1bb815a675474fba6b285074d1f085be88d575a8e4a9c0737
3
- size 10064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40f6721b5932efd5149677a8265bb6d7898ef48be3ef72efad95929d6531db9
3
+ size 11506
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9958847736625515,
3
  "total_flos": 3.689505230149386e+17,
4
- "train_loss": 10.321047170103089,
5
- "train_runtime": 1495.3969,
6
  "train_samples": 92634,
7
- "train_samples_per_second": 10.358,
8
- "train_steps_per_second": 0.081
9
  }
 
1
  {
2
  "epoch": 0.9958847736625515,
3
  "total_flos": 3.689505230149386e+17,
4
+ "train_loss": 10.36233426716702,
5
+ "train_runtime": 314.1127,
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 49.314,
8
+ "train_steps_per_second": 0.385
9
  }
trainer_state.json CHANGED
@@ -10,195 +10,195 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.00823045267489712,
13
- "grad_norm": 161.76547241210938,
14
  "learning_rate": 1.5384615384615387e-05,
15
- "loss": 49.1053,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0411522633744856,
20
- "grad_norm": 98.96170806884766,
21
  "learning_rate": 7.692307692307693e-05,
22
- "loss": 45.9684,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.0823045267489712,
27
- "grad_norm": 18.99808120727539,
28
  "learning_rate": 0.00015384615384615385,
29
- "loss": 34.4524,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.12345679012345678,
34
- "grad_norm": 18.723434448242188,
35
  "learning_rate": 0.00019983081582712685,
36
- "loss": 27.5705,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.1646090534979424,
41
- "grad_norm": 5.907467365264893,
42
  "learning_rate": 0.00019793406217655517,
43
- "loss": 24.8864,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.205761316872428,
48
- "grad_norm": 9.208550453186035,
49
  "learning_rate": 0.00019396926207859084,
50
- "loss": 23.6273,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.24691358024691357,
55
- "grad_norm": 18.81309700012207,
56
  "learning_rate": 0.00018802013911801112,
57
- "loss": 21.6619,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.2880658436213992,
62
- "grad_norm": 30.242107391357422,
63
  "learning_rate": 0.0001802123192755044,
64
- "loss": 17.8913,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.3292181069958848,
69
- "grad_norm": 39.21098327636719,
70
  "learning_rate": 0.00017071067811865476,
71
- "loss": 12.4227,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.37037037037037035,
76
- "grad_norm": 18.91362762451172,
77
  "learning_rate": 0.00015971585917027862,
78
- "loss": 6.2936,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.411522633744856,
83
- "grad_norm": 12.601763725280762,
84
  "learning_rate": 0.00014746003697476404,
85
- "loss": 3.8101,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.45267489711934156,
90
- "grad_norm": 7.025509357452393,
91
  "learning_rate": 0.00013420201433256689,
92
- "loss": 3.2225,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.49382716049382713,
97
- "grad_norm": 2.7400238513946533,
98
  "learning_rate": 0.00012022175723320381,
99
- "loss": 2.7407,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.5349794238683128,
104
- "grad_norm": 2.330040454864502,
105
  "learning_rate": 0.00010581448289104758,
106
- "loss": 2.4184,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5761316872427984,
111
- "grad_norm": 1.8120249509811401,
112
  "learning_rate": 9.128442572523417e-05,
113
- "loss": 2.2454,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.6172839506172839,
118
- "grad_norm": 2.3081297874450684,
119
  "learning_rate": 7.693841292575598e-05,
120
- "loss": 2.1335,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.6584362139917695,
125
- "grad_norm": 0.960925281047821,
126
  "learning_rate": 6.307938526873157e-05,
127
- "loss": 2.0547,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.6995884773662552,
132
- "grad_norm": 0.8918873071670532,
133
  "learning_rate": 5.000000000000002e-05,
134
- "loss": 1.9972,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.7407407407407407,
139
- "grad_norm": 0.7359836101531982,
140
  "learning_rate": 3.7976450873174005e-05,
141
- "loss": 1.9597,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.7818930041152263,
146
- "grad_norm": 1.2649565935134888,
147
  "learning_rate": 2.7262635842695127e-05,
148
- "loss": 1.9328,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.823045267489712,
153
- "grad_norm": 1.0137094259262085,
154
  "learning_rate": 1.808479557110081e-05,
155
- "loss": 1.9037,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.8641975308641975,
160
- "grad_norm": 1.392273187637329,
161
  "learning_rate": 1.0636735967658784e-05,
162
- "loss": 1.9017,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.9053497942386831,
167
- "grad_norm": 0.9710758924484253,
168
  "learning_rate": 5.075735642696611e-06,
169
- "loss": 1.9008,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.9465020576131687,
174
- "grad_norm": 1.074477195739746,
175
  "learning_rate": 1.5192246987791981e-06,
176
- "loss": 1.8851,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.9876543209876543,
181
- "grad_norm": 0.776634693145752,
182
  "learning_rate": 4.230499177994007e-08,
183
- "loss": 1.8961,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.9958847736625515,
188
- "eval_loss": 2.1894350051879883,
189
- "eval_runtime": 1.256,
190
- "eval_samples_per_second": 3.185,
191
- "eval_steps_per_second": 0.796,
192
  "step": 121
193
  },
194
  {
195
  "epoch": 0.9958847736625515,
196
  "step": 121,
197
  "total_flos": 3.689505230149386e+17,
198
- "train_loss": 10.321047170103089,
199
- "train_runtime": 1495.3969,
200
- "train_samples_per_second": 10.358,
201
- "train_steps_per_second": 0.081
202
  }
203
  ],
204
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.00823045267489712,
13
+ "grad_norm": 157.12161254882812,
14
  "learning_rate": 1.5384615384615387e-05,
15
+ "loss": 48.0544,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0411522633744856,
20
+ "grad_norm": 110.75520324707031,
21
  "learning_rate": 7.692307692307693e-05,
22
+ "loss": 46.1352,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.0823045267489712,
27
+ "grad_norm": 19.529361724853516,
28
  "learning_rate": 0.00015384615384615385,
29
+ "loss": 34.622,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.12345679012345678,
34
+ "grad_norm": 19.890928268432617,
35
  "learning_rate": 0.00019983081582712685,
36
+ "loss": 27.6403,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.1646090534979424,
41
+ "grad_norm": 6.077105522155762,
42
  "learning_rate": 0.00019793406217655517,
43
+ "loss": 24.9282,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.205761316872428,
48
+ "grad_norm": 9.045083999633789,
49
  "learning_rate": 0.00019396926207859084,
50
+ "loss": 23.6636,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.24691358024691357,
55
+ "grad_norm": 18.40345573425293,
56
  "learning_rate": 0.00018802013911801112,
57
+ "loss": 21.7488,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.2880658436213992,
62
+ "grad_norm": 29.79853057861328,
63
  "learning_rate": 0.0001802123192755044,
64
+ "loss": 18.0863,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.3292181069958848,
69
+ "grad_norm": 39.49338912963867,
70
  "learning_rate": 0.00017071067811865476,
71
+ "loss": 12.8006,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.37037037037037035,
76
+ "grad_norm": 21.521495819091797,
77
  "learning_rate": 0.00015971585917027862,
78
+ "loss": 6.6678,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.411522633744856,
83
+ "grad_norm": 11.19473934173584,
84
  "learning_rate": 0.00014746003697476404,
85
+ "loss": 3.8272,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.45267489711934156,
90
+ "grad_norm": 6.990288734436035,
91
  "learning_rate": 0.00013420201433256689,
92
+ "loss": 3.1646,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.49382716049382713,
97
+ "grad_norm": 2.7517666816711426,
98
  "learning_rate": 0.00012022175723320381,
99
+ "loss": 2.6933,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.5349794238683128,
104
+ "grad_norm": 2.2007288932800293,
105
  "learning_rate": 0.00010581448289104758,
106
+ "loss": 2.3824,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5761316872427984,
111
+ "grad_norm": 2.1180613040924072,
112
  "learning_rate": 9.128442572523417e-05,
113
+ "loss": 2.2163,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.6172839506172839,
118
+ "grad_norm": 2.3379714488983154,
119
  "learning_rate": 7.693841292575598e-05,
120
+ "loss": 2.1149,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.6584362139917695,
125
+ "grad_norm": 0.9881446957588196,
126
  "learning_rate": 6.307938526873157e-05,
127
+ "loss": 2.0377,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.6995884773662552,
132
+ "grad_norm": 0.8957713842391968,
133
  "learning_rate": 5.000000000000002e-05,
134
+ "loss": 1.985,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.7407407407407407,
139
+ "grad_norm": 0.9342585206031799,
140
  "learning_rate": 3.7976450873174005e-05,
141
+ "loss": 1.949,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.7818930041152263,
146
+ "grad_norm": 1.272150993347168,
147
  "learning_rate": 2.7262635842695127e-05,
148
+ "loss": 1.9218,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.823045267489712,
153
+ "grad_norm": 1.0257776975631714,
154
  "learning_rate": 1.808479557110081e-05,
155
+ "loss": 1.894,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.8641975308641975,
160
+ "grad_norm": 1.343111515045166,
161
  "learning_rate": 1.0636735967658784e-05,
162
+ "loss": 1.8922,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.9053497942386831,
167
+ "grad_norm": 0.977741003036499,
168
  "learning_rate": 5.075735642696611e-06,
169
+ "loss": 1.8888,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.9465020576131687,
174
+ "grad_norm": 0.9998582601547241,
175
  "learning_rate": 1.5192246987791981e-06,
176
+ "loss": 1.8767,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.9876543209876543,
181
+ "grad_norm": 0.8014172315597534,
182
  "learning_rate": 4.230499177994007e-08,
183
+ "loss": 1.8855,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.9958847736625515,
188
+ "eval_loss": 2.19870924949646,
189
+ "eval_runtime": 0.4577,
190
+ "eval_samples_per_second": 8.739,
191
+ "eval_steps_per_second": 2.185,
192
  "step": 121
193
  },
194
  {
195
  "epoch": 0.9958847736625515,
196
  "step": 121,
197
  "total_flos": 3.689505230149386e+17,
198
+ "train_loss": 10.36233426716702,
199
+ "train_runtime": 314.1127,
200
+ "train_samples_per_second": 49.314,
201
+ "train_steps_per_second": 0.385
202
  }
203
  ],
204
  "logging_steps": 5,