mnpham commited on
Commit
697e236
·
verified ·
1 Parent(s): 1e04a2d

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +9 -9
  3. train_results.json +9 -9
  4. trainer_state.json +52 -52
  5. training_loss.png +0 -0
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # freelancer-projects-1k-traces
18
 
19
- This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on an unknown dataset.
20
 
21
  ## Model description
22
 
 
16
 
17
  # freelancer-projects-1k-traces
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the DCAgent/freelancer-projects-1k-traces dataset.
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "achieved_tflops_per_gpu": 12.363374282190918,
3
- "achieved_tflops_per_gpu_theoretical": 54.206473005170686,
4
  "epoch": 5.0,
5
  "loss_nan_ranks": 0,
6
- "loss_rank_avg": 0.0642264187335968,
7
- "mfu_percent": 1.2500884006259776,
8
- "mfu_percent_theoretical": 5.480937614274083,
9
  "total_flos": 2.548584738193408e+17,
10
- "train_loss": 0.5736305087804794,
11
- "train_runtime": 5153.4975,
12
- "train_samples_per_second": 0.97,
13
- "train_steps_per_second": 0.016,
14
  "valid_targets_mean": 8112.2,
15
  "valid_targets_min": 6354
16
  }
 
1
  {
2
+ "achieved_tflops_per_gpu": 12.2732109701098,
3
+ "achieved_tflops_per_gpu_theoretical": 50.81856844234751,
4
  "epoch": 5.0,
5
  "loss_nan_ranks": 0,
6
+ "loss_rank_avg": 0.06411445885896683,
7
+ "mfu_percent": 1.240971786664287,
8
+ "mfu_percent_theoretical": 5.138379013381953,
9
  "total_flos": 2.548584738193408e+17,
10
+ "train_loss": 0.5731773316860199,
11
+ "train_runtime": 5191.3569,
12
+ "train_samples_per_second": 0.963,
13
+ "train_steps_per_second": 0.015,
14
  "valid_targets_mean": 8112.2,
15
  "valid_targets_min": 6354
16
  }
train_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "achieved_tflops_per_gpu": 12.363374282190918,
3
- "achieved_tflops_per_gpu_theoretical": 54.206473005170686,
4
  "epoch": 5.0,
5
  "loss_nan_ranks": 0,
6
- "loss_rank_avg": 0.0642264187335968,
7
- "mfu_percent": 1.2500884006259776,
8
- "mfu_percent_theoretical": 5.480937614274083,
9
  "total_flos": 2.548584738193408e+17,
10
- "train_loss": 0.5736305087804794,
11
- "train_runtime": 5153.4975,
12
- "train_samples_per_second": 0.97,
13
- "train_steps_per_second": 0.016,
14
  "valid_targets_mean": 8112.2,
15
  "valid_targets_min": 6354
16
  }
 
1
  {
2
+ "achieved_tflops_per_gpu": 12.2732109701098,
3
+ "achieved_tflops_per_gpu_theoretical": 50.81856844234751,
4
  "epoch": 5.0,
5
  "loss_nan_ranks": 0,
6
+ "loss_rank_avg": 0.06411445885896683,
7
+ "mfu_percent": 1.240971786664287,
8
+ "mfu_percent_theoretical": 5.138379013381953,
9
  "total_flos": 2.548584738193408e+17,
10
+ "train_loss": 0.5731773316860199,
11
+ "train_runtime": 5191.3569,
12
+ "train_samples_per_second": 0.963,
13
+ "train_steps_per_second": 0.015,
14
  "valid_targets_mean": 8112.2,
15
  "valid_targets_min": 6354
16
  }
trainer_state.json CHANGED
@@ -11,176 +11,176 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.32,
14
- "grad_norm": 3.003118567620921,
15
  "learning_rate": 2e-05,
16
  "loss": 0.7921,
17
  "loss_nan_ranks": 0,
18
- "loss_rank_avg": 0.04151468724012375,
19
  "step": 5,
20
  "valid_targets_mean": 4567.2,
21
  "valid_targets_min": 2907
22
  },
23
  {
24
  "epoch": 0.64,
25
- "grad_norm": 0.9808687056622315,
26
  "learning_rate": 3.998096443163716e-05,
27
- "loss": 0.6914,
28
  "loss_nan_ranks": 0,
29
- "loss_rank_avg": 0.037822432816028595,
30
  "step": 10,
31
  "valid_targets_mean": 4694.8,
32
  "valid_targets_min": 1740
33
  },
34
  {
35
  "epoch": 0.96,
36
- "grad_norm": 0.523224658733824,
37
  "learning_rate": 3.931851652578137e-05,
38
- "loss": 0.634,
39
  "loss_nan_ranks": 0,
40
- "loss_rank_avg": 0.03384334594011307,
41
  "step": 15,
42
  "valid_targets_mean": 6716.5,
43
  "valid_targets_min": 3779
44
  },
45
  {
46
  "epoch": 1.256,
47
- "grad_norm": 0.37294181709909296,
48
  "learning_rate": 3.774021666356444e-05,
49
- "loss": 0.5937,
50
  "loss_nan_ranks": 0,
51
- "loss_rank_avg": 0.03365647792816162,
52
  "step": 20,
53
  "valid_targets_mean": 4429.5,
54
  "valid_targets_min": 1506
55
  },
56
  {
57
  "epoch": 1.576,
58
- "grad_norm": 0.312916764359895,
59
  "learning_rate": 3.532088886237956e-05,
60
- "loss": 0.5791,
61
  "loss_nan_ranks": 0,
62
- "loss_rank_avg": 0.030184874311089516,
63
  "step": 25,
64
  "valid_targets_mean": 6942.8,
65
  "valid_targets_min": 5230
66
  },
67
  {
68
  "epoch": 1.896,
69
- "grad_norm": 0.2963842677893648,
70
  "learning_rate": 3.217522858017442e-05,
71
- "loss": 0.5804,
72
  "loss_nan_ranks": 0,
73
- "loss_rank_avg": 0.02675670199096203,
74
  "step": 30,
75
  "valid_targets_mean": 3804.5,
76
  "valid_targets_min": 1421
77
  },
78
  {
79
  "epoch": 2.192,
80
- "grad_norm": 0.26474800680849037,
81
  "learning_rate": 2.8452365234813992e-05,
82
- "loss": 0.5691,
83
  "loss_nan_ranks": 0,
84
- "loss_rank_avg": 0.031951915472745895,
85
  "step": 35,
86
  "valid_targets_mean": 4945.2,
87
  "valid_targets_min": 929
88
  },
89
  {
90
  "epoch": 2.512,
91
- "grad_norm": 0.24415095759699768,
92
  "learning_rate": 2.4328792278762058e-05,
93
- "loss": 0.5313,
94
  "loss_nan_ranks": 0,
95
- "loss_rank_avg": 0.030482709407806396,
96
  "step": 40,
97
  "valid_targets_mean": 5575.0,
98
  "valid_targets_min": 3849
99
  },
100
  {
101
  "epoch": 2.832,
102
- "grad_norm": 0.2374854041894097,
103
  "learning_rate": 2e-05,
104
- "loss": 0.5488,
105
  "loss_nan_ranks": 0,
106
- "loss_rank_avg": 0.03602883964776993,
107
  "step": 45,
108
  "valid_targets_mean": 5869.8,
109
  "valid_targets_min": 3374
110
  },
111
  {
112
  "epoch": 3.128,
113
- "grad_norm": 0.2151479244733432,
114
  "learning_rate": 1.5671207721237945e-05,
115
- "loss": 0.5284,
116
  "loss_nan_ranks": 0,
117
- "loss_rank_avg": 0.04070408642292023,
118
  "step": 50,
119
  "valid_targets_mean": 8575.2,
120
  "valid_targets_min": 5225
121
  },
122
  {
123
  "epoch": 3.448,
124
- "grad_norm": 0.21832773258775734,
125
  "learning_rate": 1.1547634765186016e-05,
126
- "loss": 0.5323,
127
  "loss_nan_ranks": 0,
128
- "loss_rank_avg": 0.029450973495841026,
129
  "step": 55,
130
  "valid_targets_mean": 6379.2,
131
  "valid_targets_min": 1888
132
  },
133
  {
134
  "epoch": 3.768,
135
- "grad_norm": 0.24241830780053505,
136
  "learning_rate": 7.824771419825588e-06,
137
- "loss": 0.53,
138
  "loss_nan_ranks": 0,
139
- "loss_rank_avg": 0.03408423066139221,
140
  "step": 60,
141
  "valid_targets_mean": 5950.8,
142
  "valid_targets_min": 4866
143
  },
144
  {
145
  "epoch": 4.064,
146
- "grad_norm": 0.2119691471196956,
147
  "learning_rate": 4.679111137620442e-06,
148
- "loss": 0.5101,
149
  "loss_nan_ranks": 0,
150
- "loss_rank_avg": 0.027629852294921875,
151
  "step": 65,
152
  "valid_targets_mean": 5062.5,
153
  "valid_targets_min": 4995
154
  },
155
  {
156
  "epoch": 4.384,
157
- "grad_norm": 0.2243043904767433,
158
  "learning_rate": 2.259783336435566e-06,
159
- "loss": 0.5329,
160
  "loss_nan_ranks": 0,
161
- "loss_rank_avg": 0.04041476547718048,
162
  "step": 70,
163
  "valid_targets_mean": 6195.2,
164
  "valid_targets_min": 5413
165
  },
166
  {
167
  "epoch": 4.704,
168
- "grad_norm": 0.2288274719846025,
169
  "learning_rate": 6.814834742186361e-07,
170
- "loss": 0.5152,
171
  "loss_nan_ranks": 0,
172
- "loss_rank_avg": 0.029502740129828453,
173
  "step": 75,
174
  "valid_targets_mean": 5352.2,
175
  "valid_targets_min": 4831
176
  },
177
  {
178
  "epoch": 5.0,
179
- "grad_norm": 0.28538716529854286,
180
  "learning_rate": 1.9035568362844037e-08,
181
- "loss": 0.5091,
182
  "loss_nan_ranks": 0,
183
- "loss_rank_avg": 0.0642264187335968,
184
  "step": 80,
185
  "valid_targets_mean": 8112.2,
186
  "valid_targets_min": 6354
@@ -188,13 +188,13 @@
188
  {
189
  "epoch": 5.0,
190
  "loss_nan_ranks": 0,
191
- "loss_rank_avg": 0.0642264187335968,
192
  "step": 80,
193
  "total_flos": 2.548584738193408e+17,
194
- "train_loss": 0.5736305087804794,
195
- "train_runtime": 5153.4975,
196
- "train_samples_per_second": 0.97,
197
- "train_steps_per_second": 0.016,
198
  "valid_targets_mean": 8112.2,
199
  "valid_targets_min": 6354
200
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 0.32,
14
+ "grad_norm": 2.9994249605525956,
15
  "learning_rate": 2e-05,
16
  "loss": 0.7921,
17
  "loss_nan_ranks": 0,
18
+ "loss_rank_avg": 0.041493918746709824,
19
  "step": 5,
20
  "valid_targets_mean": 4567.2,
21
  "valid_targets_min": 2907
22
  },
23
  {
24
  "epoch": 0.64,
25
+ "grad_norm": 0.9708943876218247,
26
  "learning_rate": 3.998096443163716e-05,
27
+ "loss": 0.6915,
28
  "loss_nan_ranks": 0,
29
+ "loss_rank_avg": 0.03782758116722107,
30
  "step": 10,
31
  "valid_targets_mean": 4694.8,
32
  "valid_targets_min": 1740
33
  },
34
  {
35
  "epoch": 0.96,
36
+ "grad_norm": 0.5233591627349731,
37
  "learning_rate": 3.931851652578137e-05,
38
+ "loss": 0.6339,
39
  "loss_nan_ranks": 0,
40
+ "loss_rank_avg": 0.03382924944162369,
41
  "step": 15,
42
  "valid_targets_mean": 6716.5,
43
  "valid_targets_min": 3779
44
  },
45
  {
46
  "epoch": 1.256,
47
+ "grad_norm": 0.3736995715721388,
48
  "learning_rate": 3.774021666356444e-05,
49
+ "loss": 0.5936,
50
  "loss_nan_ranks": 0,
51
+ "loss_rank_avg": 0.03365553170442581,
52
  "step": 20,
53
  "valid_targets_mean": 4429.5,
54
  "valid_targets_min": 1506
55
  },
56
  {
57
  "epoch": 1.576,
58
+ "grad_norm": 0.3088460905245533,
59
  "learning_rate": 3.532088886237956e-05,
60
+ "loss": 0.5789,
61
  "loss_nan_ranks": 0,
62
+ "loss_rank_avg": 0.03015170432627201,
63
  "step": 25,
64
  "valid_targets_mean": 6942.8,
65
  "valid_targets_min": 5230
66
  },
67
  {
68
  "epoch": 1.896,
69
+ "grad_norm": 0.2794462666270456,
70
  "learning_rate": 3.217522858017442e-05,
71
+ "loss": 0.5801,
72
  "loss_nan_ranks": 0,
73
+ "loss_rank_avg": 0.026743754744529724,
74
  "step": 30,
75
  "valid_targets_mean": 3804.5,
76
  "valid_targets_min": 1421
77
  },
78
  {
79
  "epoch": 2.192,
80
+ "grad_norm": 0.2553542474949641,
81
  "learning_rate": 2.8452365234813992e-05,
82
+ "loss": 0.5686,
83
  "loss_nan_ranks": 0,
84
+ "loss_rank_avg": 0.03189000487327576,
85
  "step": 35,
86
  "valid_targets_mean": 4945.2,
87
  "valid_targets_min": 929
88
  },
89
  {
90
  "epoch": 2.512,
91
+ "grad_norm": 0.24170921030597534,
92
  "learning_rate": 2.4328792278762058e-05,
93
+ "loss": 0.5308,
94
  "loss_nan_ranks": 0,
95
+ "loss_rank_avg": 0.03042096272110939,
96
  "step": 40,
97
  "valid_targets_mean": 5575.0,
98
  "valid_targets_min": 3849
99
  },
100
  {
101
  "epoch": 2.832,
102
+ "grad_norm": 0.25636693364304236,
103
  "learning_rate": 2e-05,
104
+ "loss": 0.5482,
105
  "loss_nan_ranks": 0,
106
+ "loss_rank_avg": 0.03599570691585541,
107
  "step": 45,
108
  "valid_targets_mean": 5869.8,
109
  "valid_targets_min": 3374
110
  },
111
  {
112
  "epoch": 3.128,
113
+ "grad_norm": 0.20983492822310348,
114
  "learning_rate": 1.5671207721237945e-05,
115
+ "loss": 0.5278,
116
  "loss_nan_ranks": 0,
117
+ "loss_rank_avg": 0.04059098660945892,
118
  "step": 50,
119
  "valid_targets_mean": 8575.2,
120
  "valid_targets_min": 5225
121
  },
122
  {
123
  "epoch": 3.448,
124
+ "grad_norm": 0.2172073228470424,
125
  "learning_rate": 1.1547634765186016e-05,
126
+ "loss": 0.5317,
127
  "loss_nan_ranks": 0,
128
+ "loss_rank_avg": 0.029378097504377365,
129
  "step": 55,
130
  "valid_targets_mean": 6379.2,
131
  "valid_targets_min": 1888
132
  },
133
  {
134
  "epoch": 3.768,
135
+ "grad_norm": 0.21524199887805276,
136
  "learning_rate": 7.824771419825588e-06,
137
+ "loss": 0.5294,
138
  "loss_nan_ranks": 0,
139
+ "loss_rank_avg": 0.034020863473415375,
140
  "step": 60,
141
  "valid_targets_mean": 5950.8,
142
  "valid_targets_min": 4866
143
  },
144
  {
145
  "epoch": 4.064,
146
+ "grad_norm": 0.20705060345575096,
147
  "learning_rate": 4.679111137620442e-06,
148
+ "loss": 0.5094,
149
  "loss_nan_ranks": 0,
150
+ "loss_rank_avg": 0.027570832520723343,
151
  "step": 65,
152
  "valid_targets_mean": 5062.5,
153
  "valid_targets_min": 4995
154
  },
155
  {
156
  "epoch": 4.384,
157
+ "grad_norm": 0.2251847775205461,
158
  "learning_rate": 2.259783336435566e-06,
159
+ "loss": 0.5321,
160
  "loss_nan_ranks": 0,
161
+ "loss_rank_avg": 0.040328048169612885,
162
  "step": 70,
163
  "valid_targets_mean": 6195.2,
164
  "valid_targets_min": 5413
165
  },
166
  {
167
  "epoch": 4.704,
168
+ "grad_norm": 0.2190226944296032,
169
  "learning_rate": 6.814834742186361e-07,
170
+ "loss": 0.5144,
171
  "loss_nan_ranks": 0,
172
+ "loss_rank_avg": 0.02948196418583393,
173
  "step": 75,
174
  "valid_targets_mean": 5352.2,
175
  "valid_targets_min": 4831
176
  },
177
  {
178
  "epoch": 5.0,
179
+ "grad_norm": 0.2733533274609881,
180
  "learning_rate": 1.9035568362844037e-08,
181
+ "loss": 0.5083,
182
  "loss_nan_ranks": 0,
183
+ "loss_rank_avg": 0.06411445885896683,
184
  "step": 80,
185
  "valid_targets_mean": 8112.2,
186
  "valid_targets_min": 6354
 
188
  {
189
  "epoch": 5.0,
190
  "loss_nan_ranks": 0,
191
+ "loss_rank_avg": 0.06411445885896683,
192
  "step": 80,
193
  "total_flos": 2.548584738193408e+17,
194
+ "train_loss": 0.5731773316860199,
195
+ "train_runtime": 5191.3569,
196
+ "train_samples_per_second": 0.963,
197
+ "train_steps_per_second": 0.015,
198
  "valid_targets_mean": 8112.2,
199
  "valid_targets_min": 6354
200
  }
training_loss.png CHANGED