esfrankel17 commited on
Commit
d5e43ce
·
verified ·
1 Parent(s): 65d3f94

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: llama3.1
4
  base_model: meta-llama/Meta-Llama-3.1-8B
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: oh_v1-2_only_camel_math
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # oh_v1-2_only_camel_math
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.3989
21
 
 
4
  base_model: meta-llama/Meta-Llama-3.1-8B
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: oh_v1-2_only_camel_math
 
16
 
17
  # oh_v1-2_only_camel_math
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1-2_only_camel_math dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.3989
22
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9575757575757575,
3
+ "eval_loss": 0.3988819718360901,
4
+ "eval_runtime": 45.9635,
5
+ "eval_samples_per_second": 36.268,
6
+ "eval_steps_per_second": 0.587,
7
+ "total_flos": 306322436259840.0,
8
+ "train_loss": 0.3892440470189996,
9
+ "train_runtime": 6166.0025,
10
+ "train_samples_per_second": 15.409,
11
+ "train_steps_per_second": 0.03
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9575757575757575,
3
+ "eval_loss": 0.3988819718360901,
4
+ "eval_runtime": 45.9635,
5
+ "eval_samples_per_second": 36.268,
6
+ "eval_steps_per_second": 0.587
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.9575757575757575,
3
+ "total_flos": 306322436259840.0,
4
+ "train_loss": 0.3892440470189996,
5
+ "train_runtime": 6166.0025,
6
+ "train_samples_per_second": 15.409,
7
+ "train_steps_per_second": 0.03
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9575757575757575,
5
+ "eval_steps": 500,
6
+ "global_step": 183,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16161616161616163,
13
+ "grad_norm": 6.166819605908173,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.5239,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.32323232323232326,
20
+ "grad_norm": 0.5900922088739049,
21
+ "learning_rate": 5e-06,
22
+ "loss": 0.4554,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.48484848484848486,
27
+ "grad_norm": 0.6225237661644525,
28
+ "learning_rate": 5e-06,
29
+ "loss": 0.4328,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.6464646464646465,
34
+ "grad_norm": 0.7157838630554848,
35
+ "learning_rate": 5e-06,
36
+ "loss": 0.4187,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.8080808080808081,
41
+ "grad_norm": 0.5140317827695708,
42
+ "learning_rate": 5e-06,
43
+ "loss": 0.4144,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.9696969696969697,
48
+ "grad_norm": 0.392407636237434,
49
+ "learning_rate": 5e-06,
50
+ "loss": 0.4052,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.9858585858585859,
55
+ "eval_loss": 0.41120877861976624,
56
+ "eval_runtime": 44.0611,
57
+ "eval_samples_per_second": 37.834,
58
+ "eval_steps_per_second": 0.613,
59
+ "step": 61
60
+ },
61
+ {
62
+ "epoch": 1.1313131313131313,
63
+ "grad_norm": 0.4462073119339811,
64
+ "learning_rate": 5e-06,
65
+ "loss": 0.3938,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 1.2929292929292928,
70
+ "grad_norm": 0.4719593740637459,
71
+ "learning_rate": 5e-06,
72
+ "loss": 0.3746,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 1.4545454545454546,
77
+ "grad_norm": 0.3870857604814084,
78
+ "learning_rate": 5e-06,
79
+ "loss": 0.3748,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 1.6161616161616161,
84
+ "grad_norm": 0.4809774445566907,
85
+ "learning_rate": 5e-06,
86
+ "loss": 0.3751,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 1.7777777777777777,
91
+ "grad_norm": 0.3871389438459577,
92
+ "learning_rate": 5e-06,
93
+ "loss": 0.3737,
94
+ "step": 110
95
+ },
96
+ {
97
+ "epoch": 1.9393939393939394,
98
+ "grad_norm": 0.4244656775249173,
99
+ "learning_rate": 5e-06,
100
+ "loss": 0.3753,
101
+ "step": 120
102
+ },
103
+ {
104
+ "epoch": 1.9878787878787878,
105
+ "eval_loss": 0.3987608253955841,
106
+ "eval_runtime": 46.45,
107
+ "eval_samples_per_second": 35.888,
108
+ "eval_steps_per_second": 0.581,
109
+ "step": 123
110
+ },
111
+ {
112
+ "epoch": 2.101010101010101,
113
+ "grad_norm": 0.7823936552473925,
114
+ "learning_rate": 5e-06,
115
+ "loss": 0.36,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 2.2626262626262625,
120
+ "grad_norm": 0.41729734619946546,
121
+ "learning_rate": 5e-06,
122
+ "loss": 0.3487,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 2.4242424242424243,
127
+ "grad_norm": 0.6202086141424988,
128
+ "learning_rate": 5e-06,
129
+ "loss": 0.348,
130
+ "step": 150
131
+ },
132
+ {
133
+ "epoch": 2.5858585858585856,
134
+ "grad_norm": 0.7937253942964878,
135
+ "learning_rate": 5e-06,
136
+ "loss": 0.3508,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 2.7474747474747474,
141
+ "grad_norm": 0.38375331802783846,
142
+ "learning_rate": 5e-06,
143
+ "loss": 0.3486,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 2.909090909090909,
148
+ "grad_norm": 0.41424447785006596,
149
+ "learning_rate": 5e-06,
150
+ "loss": 0.3467,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 2.9575757575757575,
155
+ "eval_loss": 0.3988819718360901,
156
+ "eval_runtime": 43.7285,
157
+ "eval_samples_per_second": 38.122,
158
+ "eval_steps_per_second": 0.617,
159
+ "step": 183
160
+ },
161
+ {
162
+ "epoch": 2.9575757575757575,
163
+ "step": 183,
164
+ "total_flos": 306322436259840.0,
165
+ "train_loss": 0.3892440470189996,
166
+ "train_runtime": 6166.0025,
167
+ "train_samples_per_second": 15.409,
168
+ "train_steps_per_second": 0.03
169
+ }
170
+ ],
171
+ "logging_steps": 10,
172
+ "max_steps": 183,
173
+ "num_input_tokens_seen": 0,
174
+ "num_train_epochs": 3,
175
+ "save_steps": 500,
176
+ "stateful_callbacks": {
177
+ "TrainerControl": {
178
+ "args": {
179
+ "should_epoch_stop": false,
180
+ "should_evaluate": false,
181
+ "should_log": false,
182
+ "should_save": true,
183
+ "should_training_stop": true
184
+ },
185
+ "attributes": {}
186
+ }
187
+ },
188
+ "total_flos": 306322436259840.0,
189
+ "train_batch_size": 8,
190
+ "trial_name": null,
191
+ "trial_params": null
192
+ }
training_eval_loss.png ADDED
training_loss.png ADDED