Tippawan commited on
Commit
9e3b0da
·
verified ·
1 Parent(s): 86423ba

End of training

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +218 -0
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # tinyllama-codeHtml3
20
 
21
- This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
22
 
23
  ## Model description
24
 
 
18
 
19
  # tinyllama-codeHtml3
20
 
21
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the colors dataset.
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.4721435316336166,
3
+ "total_flos": 1.875195005553869e+16,
4
+ "train_loss": 0.12820044946670534,
5
+ "train_runtime": 1672.7519,
6
+ "train_samples_per_second": 9.565,
7
+ "train_steps_per_second": 0.149
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.4721435316336166,
3
+ "total_flos": 1.875195005553869e+16,
4
+ "train_loss": 0.12820044946670534,
5
+ "train_runtime": 1672.7519,
6
+ "train_samples_per_second": 9.565,
7
+ "train_steps_per_second": 0.149
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.4721435316336166,
6
+ "eval_steps": 500,
7
+ "global_step": 250,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.018885741265344664,
14
+ "grad_norm": 0.4920569062232971,
15
+ "learning_rate": 0.00019936113105200085,
16
+ "loss": 0.7296,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.03777148253068933,
21
+ "grad_norm": 0.16350923478603363,
22
+ "learning_rate": 0.0001971631732914674,
23
+ "loss": 0.1418,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.056657223796033995,
28
+ "grad_norm": 0.137547567486763,
29
+ "learning_rate": 0.00019343289424566122,
30
+ "loss": 0.1158,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.07554296506137866,
35
+ "grad_norm": 0.1265052706003189,
36
+ "learning_rate": 0.00018822912264349534,
37
+ "loss": 0.1129,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.09442870632672333,
42
+ "grad_norm": 0.12020522356033325,
43
+ "learning_rate": 0.00018163392507171842,
44
+ "loss": 0.109,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.11331444759206799,
49
+ "grad_norm": 0.19054941833019257,
50
+ "learning_rate": 0.0001737513117358174,
51
+ "loss": 0.1082,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.13220018885741266,
56
+ "grad_norm": 0.10867941379547119,
57
+ "learning_rate": 0.00016470559615694446,
58
+ "loss": 0.1073,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.1510859301227573,
63
+ "grad_norm": 0.13128109276294708,
64
+ "learning_rate": 0.00015463943467342693,
65
+ "loss": 0.1036,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.16997167138810199,
70
+ "grad_norm": 0.09215914458036423,
71
+ "learning_rate": 0.0001437115766650933,
72
+ "loss": 0.0999,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.18885741265344666,
77
+ "grad_norm": 0.15978454053401947,
78
+ "learning_rate": 0.00013209436098072095,
79
+ "loss": 0.1009,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.2077431539187913,
84
+ "grad_norm": 0.11780431866645813,
85
+ "learning_rate": 0.00011997099805144069,
86
+ "loss": 0.0996,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.22662889518413598,
91
+ "grad_norm": 0.1909545511007309,
92
+ "learning_rate": 0.00010753268055279329,
93
+ "loss": 0.1015,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.24551463644948066,
98
+ "grad_norm": 0.12635937333106995,
99
+ "learning_rate": 9.497556818202306e-05,
100
+ "loss": 0.1023,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.26440037771482533,
105
+ "grad_norm": 0.12850093841552734,
106
+ "learning_rate": 8.249769410247239e-05,
107
+ "loss": 0.1005,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.28328611898017,
112
+ "grad_norm": 0.10055939108133316,
113
+ "learning_rate": 7.029584184229653e-05,
114
+ "loss": 0.0958,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.3021718602455146,
119
+ "grad_norm": 0.13055960834026337,
120
+ "learning_rate": 5.856244190067159e-05,
121
+ "loss": 0.0972,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.3210576015108593,
126
+ "grad_norm": 0.11343678832054138,
127
+ "learning_rate": 4.748253700387042e-05,
128
+ "loss": 0.0977,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.33994334277620397,
133
+ "grad_norm": 0.12275710701942444,
134
+ "learning_rate": 3.7230863870929964e-05,
135
+ "loss": 0.0975,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.3588290840415486,
140
+ "grad_norm": 0.12053252756595612,
141
+ "learning_rate": 2.7969097511209308e-05,
142
+ "loss": 0.0975,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.3777148253068933,
147
+ "grad_norm": 0.0943133682012558,
148
+ "learning_rate": 1.9843301512912327e-05,
149
+ "loss": 0.0968,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.39660056657223797,
154
+ "grad_norm": 0.10614251345396042,
155
+ "learning_rate": 1.2981624533047432e-05,
156
+ "loss": 0.0974,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.4154863078375826,
161
+ "grad_norm": 0.11574462801218033,
162
+ "learning_rate": 7.492279316554207e-06,
163
+ "loss": 0.0979,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.4343720491029273,
168
+ "grad_norm": 0.12150143086910248,
169
+ "learning_rate": 3.461836116672612e-06,
170
+ "loss": 0.0998,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.45325779036827196,
175
+ "grad_norm": 0.11058085411787033,
176
+ "learning_rate": 9.538574303348813e-07,
177
+ "loss": 0.096,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.4721435316336166,
182
+ "grad_norm": 0.11206520348787308,
183
+ "learning_rate": 7.895579618388827e-09,
184
+ "loss": 0.0987,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.4721435316336166,
189
+ "step": 250,
190
+ "total_flos": 1.875195005553869e+16,
191
+ "train_loss": 0.12820044946670534,
192
+ "train_runtime": 1672.7519,
193
+ "train_samples_per_second": 9.565,
194
+ "train_steps_per_second": 0.149
195
+ }
196
+ ],
197
+ "logging_steps": 10,
198
+ "max_steps": 250,
199
+ "num_input_tokens_seen": 0,
200
+ "num_train_epochs": 1,
201
+ "save_steps": 500,
202
+ "stateful_callbacks": {
203
+ "TrainerControl": {
204
+ "args": {
205
+ "should_epoch_stop": false,
206
+ "should_evaluate": false,
207
+ "should_log": false,
208
+ "should_save": true,
209
+ "should_training_stop": true
210
+ },
211
+ "attributes": {}
212
+ }
213
+ },
214
+ "total_flos": 1.875195005553869e+16,
215
+ "train_batch_size": 16,
216
+ "trial_name": null,
217
+ "trial_params": null
218
+ }