Raiff1982 commited on
Commit
ead76da
·
verified ·
1 Parent(s): cd19216

Upload folder using huggingface_hub

Browse files
consciousness/adapter_config.json CHANGED
@@ -29,10 +29,10 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "k_proj",
33
  "q_proj",
34
- "v_proj",
35
- "o_proj"
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "o_proj",
33
  "k_proj",
34
  "q_proj",
35
+ "v_proj"
 
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
consciousness/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f33b94aa241bc5e536c9835963b5b1d0cb8d7d1055f12d2f0c6b4a716d1cb3
3
  size 27297544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2633a26cf09e988f2b32a3ccdd93ae0821547a5b3864cb1e7ad13a8c7eab44e9
3
  size 27297544
consciousness/checkpoint-225/adapter_config.json CHANGED
@@ -29,10 +29,10 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
32
  "k_proj",
33
  "q_proj",
34
- "v_proj",
35
- "o_proj"
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "o_proj",
33
  "k_proj",
34
  "q_proj",
35
+ "v_proj"
 
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
consciousness/checkpoint-225/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f33b94aa241bc5e536c9835963b5b1d0cb8d7d1055f12d2f0c6b4a716d1cb3
3
  size 27297544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2633a26cf09e988f2b32a3ccdd93ae0821547a5b3864cb1e7ad13a8c7eab44e9
3
  size 27297544
consciousness/checkpoint-225/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee9eb5e6685805d3c665ee3cc29d44f68eefb0790059f256e7aa2fbfacc201e7
3
  size 54745547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd2af8e2d1a09808732f7b3860421a5364a4d1ea1d0c8402f9d55b86ab184c5
3
  size 54745547
consciousness/checkpoint-225/trainer_state.json CHANGED
@@ -10,222 +10,222 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 2.7761129081249236,
14
  "epoch": 0.13333333333333333,
15
- "grad_norm": 0.875,
16
  "learning_rate": 9.908256880733946e-05,
17
- "loss": 3.244321823120117,
18
- "mean_token_accuracy": 0.44045700430870055,
19
  "num_tokens": 24761.0,
20
  "step": 10
21
  },
22
  {
23
- "entropy": 2.040079203248024,
24
  "epoch": 0.26666666666666666,
25
- "grad_norm": 0.82421875,
26
  "learning_rate": 9.44954128440367e-05,
27
- "loss": 1.9786895751953124,
28
- "mean_token_accuracy": 0.5847875744104385,
29
  "num_tokens": 49610.0,
30
  "step": 20
31
  },
32
  {
33
- "entropy": 0.7117585293948651,
34
  "epoch": 0.4,
35
- "grad_norm": 0.3359375,
36
  "learning_rate": 8.990825688073395e-05,
37
- "loss": 0.6083943367004394,
38
- "mean_token_accuracy": 0.8619846701622009,
39
  "num_tokens": 74374.0,
40
  "step": 30
41
  },
42
  {
43
- "entropy": 0.2695525992661715,
44
  "epoch": 0.5333333333333333,
45
- "grad_norm": 0.2021484375,
46
  "learning_rate": 8.53211009174312e-05,
47
- "loss": 0.2516770601272583,
48
- "mean_token_accuracy": 0.9505109563469887,
49
  "num_tokens": 99237.0,
50
  "step": 40
51
  },
52
  {
53
- "entropy": 0.19716580733656883,
54
  "epoch": 0.6666666666666666,
55
- "grad_norm": 0.181640625,
56
  "learning_rate": 8.073394495412844e-05,
57
- "loss": 0.1818714141845703,
58
- "mean_token_accuracy": 0.9645047709345818,
59
  "num_tokens": 124034.0,
60
  "step": 50
61
  },
62
  {
63
- "entropy": 0.17025253251194955,
64
  "epoch": 0.8,
65
- "grad_norm": 0.1328125,
66
  "learning_rate": 7.614678899082569e-05,
67
- "loss": 0.1523423671722412,
68
- "mean_token_accuracy": 0.9701748862862587,
69
  "num_tokens": 148853.0,
70
  "step": 60
71
  },
72
  {
73
- "entropy": 0.14797840677201748,
74
  "epoch": 0.9333333333333333,
75
- "grad_norm": 0.1962890625,
76
  "learning_rate": 7.155963302752295e-05,
77
- "loss": 0.13764077425003052,
78
- "mean_token_accuracy": 0.9720944717526436,
79
  "num_tokens": 173595.0,
80
  "step": 70
81
  },
82
  {
83
- "entropy": 0.138279221765697,
84
  "epoch": 1.0666666666666667,
85
- "grad_norm": 0.1357421875,
86
  "learning_rate": 6.697247706422018e-05,
87
- "loss": 0.12346233129501342,
88
- "mean_token_accuracy": 0.9741749912500381,
89
  "num_tokens": 198381.0,
90
  "step": 80
91
  },
92
  {
93
- "entropy": 0.1283793417736888,
94
  "epoch": 1.2,
95
- "grad_norm": 0.11376953125,
96
  "learning_rate": 6.238532110091744e-05,
97
- "loss": 0.11164928674697876,
98
- "mean_token_accuracy": 0.9769473403692246,
99
  "num_tokens": 223136.0,
100
  "step": 90
101
  },
102
  {
103
- "entropy": 0.12412137631326914,
104
  "epoch": 1.3333333333333333,
105
- "grad_norm": 0.1162109375,
106
  "learning_rate": 5.779816513761468e-05,
107
- "loss": 0.10832087993621826,
108
- "mean_token_accuracy": 0.9769640281796456,
109
  "num_tokens": 247997.0,
110
  "step": 100
111
  },
112
  {
113
- "entropy": 0.12210103627294303,
114
  "epoch": 1.4666666666666668,
115
- "grad_norm": 0.13671875,
116
  "learning_rate": 5.3211009174311934e-05,
117
- "loss": 0.10235855579376221,
118
- "mean_token_accuracy": 0.9771915912628174,
119
  "num_tokens": 272852.0,
120
  "step": 110
121
  },
122
  {
123
- "entropy": 0.11977386996150016,
124
  "epoch": 1.6,
125
- "grad_norm": 0.10009765625,
126
  "learning_rate": 4.862385321100918e-05,
127
- "loss": 0.09748664498329163,
128
- "mean_token_accuracy": 0.9808539599180222,
129
  "num_tokens": 297579.0,
130
  "step": 120
131
  },
132
  {
133
- "entropy": 0.11996886190026998,
134
  "epoch": 1.7333333333333334,
135
- "grad_norm": 0.1669921875,
136
  "learning_rate": 4.403669724770643e-05,
137
- "loss": 0.09674965739250183,
138
- "mean_token_accuracy": 0.9802620068192482,
139
  "num_tokens": 322381.0,
140
  "step": 130
141
  },
142
  {
143
- "entropy": 0.11315569579601288,
144
  "epoch": 1.8666666666666667,
145
- "grad_norm": 0.10400390625,
146
  "learning_rate": 3.944954128440367e-05,
147
- "loss": 0.09112051725387574,
148
- "mean_token_accuracy": 0.9821677714586258,
149
  "num_tokens": 347147.0,
150
  "step": 140
151
  },
152
  {
153
- "entropy": 0.11618176773190499,
154
  "epoch": 2.0,
155
- "grad_norm": 0.1748046875,
156
  "learning_rate": 3.486238532110092e-05,
157
- "loss": 0.08905571103096008,
158
- "mean_token_accuracy": 0.9811127334833145,
159
  "num_tokens": 372010.0,
160
  "step": 150
161
  },
162
  {
163
- "entropy": 0.11125754974782467,
164
  "epoch": 2.1333333333333333,
165
- "grad_norm": 0.1025390625,
166
  "learning_rate": 3.027522935779817e-05,
167
- "loss": 0.0815092146396637,
168
- "mean_token_accuracy": 0.9828289076685905,
169
  "num_tokens": 396827.0,
170
  "step": 160
171
  },
172
  {
173
- "entropy": 0.09863599725067615,
174
  "epoch": 2.2666666666666666,
175
- "grad_norm": 0.1181640625,
176
  "learning_rate": 2.5688073394495416e-05,
177
- "loss": 0.07570468187332154,
178
- "mean_token_accuracy": 0.9811465948820114,
179
  "num_tokens": 421673.0,
180
  "step": 170
181
  },
182
  {
183
- "entropy": 0.08374876081943512,
184
  "epoch": 2.4,
185
- "grad_norm": 0.146484375,
186
  "learning_rate": 2.1100917431192662e-05,
187
- "loss": 0.06952499151229859,
188
- "mean_token_accuracy": 0.9823125705122948,
189
  "num_tokens": 446476.0,
190
  "step": 180
191
  },
192
  {
193
- "entropy": 0.0826102739199996,
194
  "epoch": 2.533333333333333,
195
- "grad_norm": 0.126953125,
196
  "learning_rate": 1.651376146788991e-05,
197
- "loss": 0.067434161901474,
198
- "mean_token_accuracy": 0.9819168344140052,
199
  "num_tokens": 471250.0,
200
  "step": 190
201
  },
202
  {
203
- "entropy": 0.08154539205133915,
204
  "epoch": 2.6666666666666665,
205
- "grad_norm": 0.1318359375,
206
  "learning_rate": 1.1926605504587156e-05,
207
- "loss": 0.066810941696167,
208
- "mean_token_accuracy": 0.9812787815928459,
209
  "num_tokens": 496006.0,
210
  "step": 200
211
  },
212
  {
213
- "entropy": 0.0844537828117609,
214
  "epoch": 2.8,
215
- "grad_norm": 0.11328125,
216
  "learning_rate": 7.3394495412844045e-06,
217
- "loss": 0.06481906175613403,
218
- "mean_token_accuracy": 0.9825038447976112,
219
  "num_tokens": 520845.0,
220
  "step": 210
221
  },
222
  {
223
- "entropy": 0.0813144288957119,
224
  "epoch": 2.9333333333333336,
225
- "grad_norm": 0.09375,
226
  "learning_rate": 2.7522935779816517e-06,
227
- "loss": 0.0626812994480133,
228
- "mean_token_accuracy": 0.9836445167660713,
229
  "num_tokens": 545626.0,
230
  "step": 220
231
  }
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.7765824437141418,
14
  "epoch": 0.13333333333333333,
15
+ "grad_norm": 0.80078125,
16
  "learning_rate": 9.908256880733946e-05,
17
+ "loss": 3.2429569244384764,
18
+ "mean_token_accuracy": 0.44057580903172494,
19
  "num_tokens": 24761.0,
20
  "step": 10
21
  },
22
  {
23
+ "entropy": 2.0341117173433303,
24
  "epoch": 0.26666666666666666,
25
+ "grad_norm": 0.8203125,
26
  "learning_rate": 9.44954128440367e-05,
27
+ "loss": 1.963392448425293,
28
+ "mean_token_accuracy": 0.5877765864133835,
29
  "num_tokens": 49610.0,
30
  "step": 20
31
  },
32
  {
33
+ "entropy": 0.7002565786242485,
34
  "epoch": 0.4,
35
+ "grad_norm": 0.34765625,
36
  "learning_rate": 8.990825688073395e-05,
37
+ "loss": 0.5959352016448974,
38
+ "mean_token_accuracy": 0.8655647665262223,
39
  "num_tokens": 74374.0,
40
  "step": 30
41
  },
42
  {
43
+ "entropy": 0.26718092486262324,
44
  "epoch": 0.5333333333333333,
45
+ "grad_norm": 0.2216796875,
46
  "learning_rate": 8.53211009174312e-05,
47
+ "loss": 0.24935545921325683,
48
+ "mean_token_accuracy": 0.9515595227479935,
49
  "num_tokens": 99237.0,
50
  "step": 40
51
  },
52
  {
53
+ "entropy": 0.19680907018482685,
54
  "epoch": 0.6666666666666666,
55
+ "grad_norm": 0.1884765625,
56
  "learning_rate": 8.073394495412844e-05,
57
+ "loss": 0.18131427764892577,
58
+ "mean_token_accuracy": 0.9645171865820885,
59
  "num_tokens": 124034.0,
60
  "step": 50
61
  },
62
  {
63
+ "entropy": 0.17123022936284543,
64
  "epoch": 0.8,
65
+ "grad_norm": 0.1318359375,
66
  "learning_rate": 7.614678899082569e-05,
67
+ "loss": 0.15188711881637573,
68
+ "mean_token_accuracy": 0.9693311020731926,
69
  "num_tokens": 148853.0,
70
  "step": 60
71
  },
72
  {
73
+ "entropy": 0.15020480044186116,
74
  "epoch": 0.9333333333333333,
75
+ "grad_norm": 0.271484375,
76
  "learning_rate": 7.155963302752295e-05,
77
+ "loss": 0.13665119409561158,
78
+ "mean_token_accuracy": 0.9722599163651466,
79
  "num_tokens": 173595.0,
80
  "step": 70
81
  },
82
  {
83
+ "entropy": 0.14392711482942105,
84
  "epoch": 1.0666666666666667,
85
+ "grad_norm": 0.1533203125,
86
  "learning_rate": 6.697247706422018e-05,
87
+ "loss": 0.12209154367446899,
88
+ "mean_token_accuracy": 0.9730806604027749,
89
  "num_tokens": 198381.0,
90
  "step": 80
91
  },
92
  {
93
+ "entropy": 0.12987205907702445,
94
  "epoch": 1.2,
95
+ "grad_norm": 0.1318359375,
96
  "learning_rate": 6.238532110091744e-05,
97
+ "loss": 0.107689368724823,
98
+ "mean_token_accuracy": 0.9769851833581924,
99
  "num_tokens": 223136.0,
100
  "step": 90
101
  },
102
  {
103
+ "entropy": 0.12836090996861457,
104
  "epoch": 1.3333333333333333,
105
+ "grad_norm": 0.1240234375,
106
  "learning_rate": 5.779816513761468e-05,
107
+ "loss": 0.10651180744171143,
108
+ "mean_token_accuracy": 0.9795817092061043,
109
  "num_tokens": 247997.0,
110
  "step": 100
111
  },
112
  {
113
+ "entropy": 0.12321772910654545,
114
  "epoch": 1.4666666666666668,
115
+ "grad_norm": 0.150390625,
116
  "learning_rate": 5.3211009174311934e-05,
117
+ "loss": 0.10062012672424317,
118
+ "mean_token_accuracy": 0.980827870965004,
119
  "num_tokens": 272852.0,
120
  "step": 110
121
  },
122
  {
123
+ "entropy": 0.11977940555661917,
124
  "epoch": 1.6,
125
+ "grad_norm": 0.1103515625,
126
  "learning_rate": 4.862385321100918e-05,
127
+ "loss": 0.09529207944869995,
128
+ "mean_token_accuracy": 0.9806077152490615,
129
  "num_tokens": 297579.0,
130
  "step": 120
131
  },
132
  {
133
+ "entropy": 0.1198594804853201,
134
  "epoch": 1.7333333333333334,
135
+ "grad_norm": 0.1708984375,
136
  "learning_rate": 4.403669724770643e-05,
137
+ "loss": 0.09222554564476013,
138
+ "mean_token_accuracy": 0.9803455516695976,
139
  "num_tokens": 322381.0,
140
  "step": 130
141
  },
142
  {
143
+ "entropy": 0.10655448362231254,
144
  "epoch": 1.8666666666666667,
145
+ "grad_norm": 0.1142578125,
146
  "learning_rate": 3.944954128440367e-05,
147
+ "loss": 0.08148675560951232,
148
+ "mean_token_accuracy": 0.9823274478316307,
149
  "num_tokens": 347147.0,
150
  "step": 140
151
  },
152
  {
153
+ "entropy": 0.09842615202069283,
154
  "epoch": 2.0,
155
+ "grad_norm": 0.2109375,
156
  "learning_rate": 3.486238532110092e-05,
157
+ "loss": 0.07490686774253845,
158
+ "mean_token_accuracy": 0.9813576474785805,
159
  "num_tokens": 372010.0,
160
  "step": 150
161
  },
162
  {
163
+ "entropy": 0.08633840866386891,
164
  "epoch": 2.1333333333333333,
165
+ "grad_norm": 0.10595703125,
166
  "learning_rate": 3.027522935779817e-05,
167
+ "loss": 0.06638463735580444,
168
+ "mean_token_accuracy": 0.9825427159667015,
169
  "num_tokens": 396827.0,
170
  "step": 160
171
  },
172
  {
173
+ "entropy": 0.0847671527415514,
174
  "epoch": 2.2666666666666666,
175
+ "grad_norm": 0.1142578125,
176
  "learning_rate": 2.5688073394495416e-05,
177
+ "loss": 0.0631272852420807,
178
+ "mean_token_accuracy": 0.9811472788453102,
179
  "num_tokens": 421673.0,
180
  "step": 170
181
  },
182
  {
183
+ "entropy": 0.08435465320944786,
184
  "epoch": 2.4,
185
+ "grad_norm": 0.1455078125,
186
  "learning_rate": 2.1100917431192662e-05,
187
+ "loss": 0.05959140658378601,
188
+ "mean_token_accuracy": 0.9823448762297631,
189
  "num_tokens": 446476.0,
190
  "step": 180
191
  },
192
  {
193
+ "entropy": 0.07953801900148391,
194
  "epoch": 2.533333333333333,
195
+ "grad_norm": 0.1328125,
196
  "learning_rate": 1.651376146788991e-05,
197
+ "loss": 0.05914499163627625,
198
+ "mean_token_accuracy": 0.9821029722690582,
199
  "num_tokens": 471250.0,
200
  "step": 190
201
  },
202
  {
203
+ "entropy": 0.08058121707290411,
204
  "epoch": 2.6666666666666665,
205
+ "grad_norm": 0.14453125,
206
  "learning_rate": 1.1926605504587156e-05,
207
+ "loss": 0.05882708430290222,
208
+ "mean_token_accuracy": 0.9816043302416801,
209
  "num_tokens": 496006.0,
210
  "step": 200
211
  },
212
  {
213
+ "entropy": 0.08485903479158878,
214
  "epoch": 2.8,
215
+ "grad_norm": 0.1220703125,
216
  "learning_rate": 7.3394495412844045e-06,
217
+ "loss": 0.05778748989105224,
218
+ "mean_token_accuracy": 0.9820649787783623,
219
  "num_tokens": 520845.0,
220
  "step": 210
221
  },
222
  {
223
+ "entropy": 0.08102625366300345,
224
  "epoch": 2.9333333333333336,
225
+ "grad_norm": 0.1123046875,
226
  "learning_rate": 2.7522935779816517e-06,
227
+ "loss": 0.05605600476264953,
228
+ "mean_token_accuracy": 0.9830348506569863,
229
  "num_tokens": 545626.0,
230
  "step": 220
231
  }