gsmyrnis commited on
Commit
b56f2df
·
verified ·
1 Parent(s): cf589df

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: llama3.1
4
  base_model: meta-llama/Meta-Llama-3.1-8B
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: llama3-1_8b_dolphin
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # llama3-1_8b_dolphin
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.5015
21
 
 
4
  base_model: meta-llama/Meta-Llama-3.1-8B
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: llama3-1_8b_dolphin
 
16
 
17
  # llama3-1_8b_dolphin
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/dolphin dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.5015
22
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5014501214027405,
4
+ "eval_runtime": 6.4572,
5
+ "eval_samples_per_second": 264.664,
6
+ "eval_steps_per_second": 1.084,
7
+ "total_flos": 321607151124480.0,
8
+ "train_loss": 0.5088142100721598,
9
+ "train_runtime": 1474.9936,
10
+ "train_samples_per_second": 66.021,
11
+ "train_steps_per_second": 0.13
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 0.5014501214027405,
4
+ "eval_runtime": 6.4572,
5
+ "eval_samples_per_second": 264.664,
6
+ "eval_steps_per_second": 1.084
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 321607151124480.0,
4
+ "train_loss": 0.5088142100721598,
5
+ "train_runtime": 1474.9936,
6
+ "train_samples_per_second": 66.021,
7
+ "train_steps_per_second": 0.13
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 192,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.15625,
13
+ "grad_norm": 3.410148832715147,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.7255,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.3125,
20
+ "grad_norm": 1.9658070319836614,
21
+ "learning_rate": 5e-06,
22
+ "loss": 0.6224,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.46875,
27
+ "grad_norm": 2.0869328497874875,
28
+ "learning_rate": 5e-06,
29
+ "loss": 0.5848,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.625,
34
+ "grad_norm": 1.2748507240351463,
35
+ "learning_rate": 5e-06,
36
+ "loss": 0.5638,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.78125,
41
+ "grad_norm": 0.8013309231484568,
42
+ "learning_rate": 5e-06,
43
+ "loss": 0.5486,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.9375,
48
+ "grad_norm": 0.8121748600070269,
49
+ "learning_rate": 5e-06,
50
+ "loss": 0.5351,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 1.0,
55
+ "eval_loss": 0.5265702605247498,
56
+ "eval_runtime": 6.4953,
57
+ "eval_samples_per_second": 263.115,
58
+ "eval_steps_per_second": 1.078,
59
+ "step": 64
60
+ },
61
+ {
62
+ "epoch": 1.09375,
63
+ "grad_norm": 0.6230488954615538,
64
+ "learning_rate": 5e-06,
65
+ "loss": 0.5117,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 1.25,
70
+ "grad_norm": 0.6381342635296321,
71
+ "learning_rate": 5e-06,
72
+ "loss": 0.4927,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 1.40625,
77
+ "grad_norm": 0.5885474331678158,
78
+ "learning_rate": 5e-06,
79
+ "loss": 0.488,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 1.5625,
84
+ "grad_norm": 0.517571001381851,
85
+ "learning_rate": 5e-06,
86
+ "loss": 0.4873,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 1.71875,
91
+ "grad_norm": 0.4914014029041883,
92
+ "learning_rate": 5e-06,
93
+ "loss": 0.4868,
94
+ "step": 110
95
+ },
96
+ {
97
+ "epoch": 1.875,
98
+ "grad_norm": 0.5996088431576516,
99
+ "learning_rate": 5e-06,
100
+ "loss": 0.483,
101
+ "step": 120
102
+ },
103
+ {
104
+ "epoch": 2.0,
105
+ "eval_loss": 0.5022982358932495,
106
+ "eval_runtime": 6.4313,
107
+ "eval_samples_per_second": 265.73,
108
+ "eval_steps_per_second": 1.088,
109
+ "step": 128
110
+ },
111
+ {
112
+ "epoch": 2.03125,
113
+ "grad_norm": 0.9648708540633031,
114
+ "learning_rate": 5e-06,
115
+ "loss": 0.4743,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 2.1875,
120
+ "grad_norm": 0.641069504623041,
121
+ "learning_rate": 5e-06,
122
+ "loss": 0.4464,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 2.34375,
127
+ "grad_norm": 0.5277740771835514,
128
+ "learning_rate": 5e-06,
129
+ "loss": 0.4484,
130
+ "step": 150
131
+ },
132
+ {
133
+ "epoch": 2.5,
134
+ "grad_norm": 0.5645101097649855,
135
+ "learning_rate": 5e-06,
136
+ "loss": 0.4458,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 2.65625,
141
+ "grad_norm": 0.6429640058172255,
142
+ "learning_rate": 5e-06,
143
+ "loss": 0.4485,
144
+ "step": 170
145
+ },
146
+ {
147
+ "epoch": 2.8125,
148
+ "grad_norm": 0.4995911800780122,
149
+ "learning_rate": 5e-06,
150
+ "loss": 0.4471,
151
+ "step": 180
152
+ },
153
+ {
154
+ "epoch": 2.96875,
155
+ "grad_norm": 0.6882039357462453,
156
+ "learning_rate": 5e-06,
157
+ "loss": 0.4461,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 3.0,
162
+ "eval_loss": 0.5014501214027405,
163
+ "eval_runtime": 6.2961,
164
+ "eval_samples_per_second": 271.439,
165
+ "eval_steps_per_second": 1.112,
166
+ "step": 192
167
+ },
168
+ {
169
+ "epoch": 3.0,
170
+ "step": 192,
171
+ "total_flos": 321607151124480.0,
172
+ "train_loss": 0.5088142100721598,
173
+ "train_runtime": 1474.9936,
174
+ "train_samples_per_second": 66.021,
175
+ "train_steps_per_second": 0.13
176
+ }
177
+ ],
178
+ "logging_steps": 10,
179
+ "max_steps": 192,
180
+ "num_input_tokens_seen": 0,
181
+ "num_train_epochs": 3,
182
+ "save_steps": 500,
183
+ "stateful_callbacks": {
184
+ "TrainerControl": {
185
+ "args": {
186
+ "should_epoch_stop": false,
187
+ "should_evaluate": false,
188
+ "should_log": false,
189
+ "should_save": true,
190
+ "should_training_stop": true
191
+ },
192
+ "attributes": {}
193
+ }
194
+ },
195
+ "total_flos": 321607151124480.0,
196
+ "train_batch_size": 16,
197
+ "trial_name": null,
198
+ "trial_params": null
199
+ }
training_eval_loss.png ADDED
training_loss.png ADDED