SystemAdmin123 commited on
Commit
5146ac9
·
verified ·
1 Parent(s): 53359c7

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c7f2f332787e46fd7e5b4bfc3ab7da69e6670f1e6feff08bdf9008ef8c58b44
3
  size 4988025760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c83a0dc846d32f7d7a3d780fe4a230d16ee0e139636eebdf8f49d19cd998205c
3
  size 4988025760
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7074557b22dbdac5bcaaaa27ad7a16f0b87a8a3db0e2c3a5b2b4aeb4cdc0b591
3
  size 240691728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a34ed505e907c9e0a1dbecda8d5d6f310b44db3b745a0e73ac6aa6b3977ed6f
3
  size 240691728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37303399386e49ea69f1fff77632cfb6cb34c03ba45651cda3244cf99c030874
3
  size 5312107146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee781a3efb3210dceca33646beddc15e4531fff8487306a1a730d5abf521ce28
3
  size 5312107146
last-checkpoint/trainer_state.json CHANGED
@@ -11,157 +11,157 @@
11
  {
12
  "epoch": 0.03225806451612903,
13
  "eval_loss": 2.5740277767181396,
14
- "eval_runtime": 31.6988,
15
- "eval_samples_per_second": 47.352,
16
- "eval_steps_per_second": 1.987,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.3225806451612903,
21
- "grad_norm": 2.609375,
22
  "learning_rate": 8e-05,
23
- "loss": 1.99,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.6451612903225806,
28
- "grad_norm": 2.421875,
29
  "learning_rate": 0.00016,
30
- "loss": 2.0933,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.967741935483871,
35
- "grad_norm": 2.375,
36
  "learning_rate": 0.00019994532573409262,
37
- "loss": 2.3015,
38
  "step": 30
39
  },
40
  {
41
  "epoch": 1.2903225806451613,
42
- "grad_norm": 2.546875,
43
  "learning_rate": 0.00019950829025450114,
44
- "loss": 1.3956,
45
  "step": 40
46
  },
47
  {
48
  "epoch": 1.6129032258064515,
49
- "grad_norm": 2.25,
50
  "learning_rate": 0.00019863613034027224,
51
- "loss": 1.2746,
52
  "step": 50
53
  },
54
  {
55
  "epoch": 1.935483870967742,
56
- "grad_norm": 2.265625,
57
  "learning_rate": 0.0001973326597248006,
58
- "loss": 1.3742,
59
  "step": 60
60
  },
61
  {
62
  "epoch": 2.258064516129032,
63
- "grad_norm": 2.140625,
64
  "learning_rate": 0.00019560357815343577,
65
- "loss": 0.8139,
66
  "step": 70
67
  },
68
  {
69
  "epoch": 2.5806451612903225,
70
- "grad_norm": 2.109375,
71
  "learning_rate": 0.0001934564464599461,
72
- "loss": 0.7252,
73
  "step": 80
74
  },
75
  {
76
  "epoch": 2.903225806451613,
77
- "grad_norm": 2.078125,
78
  "learning_rate": 0.00019090065350491626,
79
- "loss": 0.8052,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 3.225806451612903,
84
- "grad_norm": 1.515625,
85
  "learning_rate": 0.0001879473751206489,
86
- "loss": 0.4985,
87
  "step": 100
88
  },
89
  {
90
  "epoch": 3.5483870967741935,
91
- "grad_norm": 2.640625,
92
  "learning_rate": 0.00018460952524209355,
93
- "loss": 0.4285,
94
  "step": 110
95
  },
96
  {
97
  "epoch": 3.870967741935484,
98
- "grad_norm": 1.765625,
99
  "learning_rate": 0.00018090169943749476,
100
- "loss": 0.4885,
101
  "step": 120
102
  },
103
  {
104
  "epoch": 4.193548387096774,
105
- "grad_norm": 1.34375,
106
  "learning_rate": 0.00017684011108568592,
107
- "loss": 0.3478,
108
  "step": 130
109
  },
110
  {
111
  "epoch": 4.516129032258064,
112
- "grad_norm": 3.390625,
113
  "learning_rate": 0.00017244252047910892,
114
- "loss": 0.2943,
115
  "step": 140
116
  },
117
  {
118
  "epoch": 4.838709677419355,
119
- "grad_norm": 1.4296875,
120
  "learning_rate": 0.00016772815716257412,
121
- "loss": 0.3338,
122
  "step": 150
123
  },
124
  {
125
  "epoch": 5.161290322580645,
126
- "grad_norm": 1.3984375,
127
  "learning_rate": 0.0001627176358473537,
128
- "loss": 0.2627,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 5.483870967741936,
133
  "grad_norm": 1.1875,
134
  "learning_rate": 0.00015743286626829437,
135
- "loss": 0.1906,
136
  "step": 170
137
  },
138
  {
139
  "epoch": 5.806451612903226,
140
- "grad_norm": 1.2578125,
141
  "learning_rate": 0.00015189695737812152,
142
- "loss": 0.216,
143
  "step": 180
144
  },
145
  {
146
  "epoch": 6.129032258064516,
147
- "grad_norm": 0.921875,
148
  "learning_rate": 0.0001461341162978688,
149
- "loss": 0.1814,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 6.451612903225806,
154
- "grad_norm": 0.9921875,
155
  "learning_rate": 0.00014016954246529696,
156
- "loss": 0.1305,
157
  "step": 200
158
  },
159
  {
160
  "epoch": 6.451612903225806,
161
- "eval_loss": 4.707674980163574,
162
- "eval_runtime": 32.054,
163
- "eval_samples_per_second": 46.827,
164
- "eval_steps_per_second": 1.965,
165
  "step": 200
166
  }
167
  ],
 
11
  {
12
  "epoch": 0.03225806451612903,
13
  "eval_loss": 2.5740277767181396,
14
+ "eval_runtime": 31.6195,
15
+ "eval_samples_per_second": 47.471,
16
+ "eval_steps_per_second": 1.992,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.3225806451612903,
21
+ "grad_norm": 2.625,
22
  "learning_rate": 8e-05,
23
+ "loss": 1.9901,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.6451612903225806,
28
+ "grad_norm": 2.28125,
29
  "learning_rate": 0.00016,
30
+ "loss": 2.0923,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.967741935483871,
35
+ "grad_norm": 2.5625,
36
  "learning_rate": 0.00019994532573409262,
37
+ "loss": 2.3185,
38
  "step": 30
39
  },
40
  {
41
  "epoch": 1.2903225806451613,
42
+ "grad_norm": 2.03125,
43
  "learning_rate": 0.00019950829025450114,
44
+ "loss": 1.3841,
45
  "step": 40
46
  },
47
  {
48
  "epoch": 1.6129032258064515,
49
+ "grad_norm": 2.28125,
50
  "learning_rate": 0.00019863613034027224,
51
+ "loss": 1.2813,
52
  "step": 50
53
  },
54
  {
55
  "epoch": 1.935483870967742,
56
+ "grad_norm": 2.15625,
57
  "learning_rate": 0.0001973326597248006,
58
+ "loss": 1.3828,
59
  "step": 60
60
  },
61
  {
62
  "epoch": 2.258064516129032,
63
+ "grad_norm": 2.046875,
64
  "learning_rate": 0.00019560357815343577,
65
+ "loss": 0.8247,
66
  "step": 70
67
  },
68
  {
69
  "epoch": 2.5806451612903225,
70
+ "grad_norm": 2.140625,
71
  "learning_rate": 0.0001934564464599461,
72
+ "loss": 0.7192,
73
  "step": 80
74
  },
75
  {
76
  "epoch": 2.903225806451613,
77
+ "grad_norm": 2.109375,
78
  "learning_rate": 0.00019090065350491626,
79
+ "loss": 0.7919,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 3.225806451612903,
84
+ "grad_norm": 1.53125,
85
  "learning_rate": 0.0001879473751206489,
86
+ "loss": 0.4979,
87
  "step": 100
88
  },
89
  {
90
  "epoch": 3.5483870967741935,
91
+ "grad_norm": 2.140625,
92
  "learning_rate": 0.00018460952524209355,
93
+ "loss": 0.435,
94
  "step": 110
95
  },
96
  {
97
  "epoch": 3.870967741935484,
98
+ "grad_norm": 1.6953125,
99
  "learning_rate": 0.00018090169943749476,
100
+ "loss": 0.4868,
101
  "step": 120
102
  },
103
  {
104
  "epoch": 4.193548387096774,
105
+ "grad_norm": 1.328125,
106
  "learning_rate": 0.00017684011108568592,
107
+ "loss": 0.3416,
108
  "step": 130
109
  },
110
  {
111
  "epoch": 4.516129032258064,
112
+ "grad_norm": 2.0,
113
  "learning_rate": 0.00017244252047910892,
114
+ "loss": 0.29,
115
  "step": 140
116
  },
117
  {
118
  "epoch": 4.838709677419355,
119
+ "grad_norm": 1.4453125,
120
  "learning_rate": 0.00016772815716257412,
121
+ "loss": 0.3182,
122
  "step": 150
123
  },
124
  {
125
  "epoch": 5.161290322580645,
126
+ "grad_norm": 1.234375,
127
  "learning_rate": 0.0001627176358473537,
128
+ "loss": 0.2422,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 5.483870967741936,
133
  "grad_norm": 1.1875,
134
  "learning_rate": 0.00015743286626829437,
135
+ "loss": 0.1847,
136
  "step": 170
137
  },
138
  {
139
  "epoch": 5.806451612903226,
140
+ "grad_norm": 1.234375,
141
  "learning_rate": 0.00015189695737812152,
142
+ "loss": 0.2181,
143
  "step": 180
144
  },
145
  {
146
  "epoch": 6.129032258064516,
147
+ "grad_norm": 1.03125,
148
  "learning_rate": 0.0001461341162978688,
149
+ "loss": 0.1774,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 6.451612903225806,
154
+ "grad_norm": 1.03125,
155
  "learning_rate": 0.00014016954246529696,
156
+ "loss": 0.1334,
157
  "step": 200
158
  },
159
  {
160
  "epoch": 6.451612903225806,
161
+ "eval_loss": 4.735217571258545,
162
+ "eval_runtime": 31.1163,
163
+ "eval_samples_per_second": 48.238,
164
+ "eval_steps_per_second": 2.025,
165
  "step": 200
166
  }
167
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16019f9ad32f51ad10b2ada2a95135cbd7d155d3f50ced87b24386e937a6ea29
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17c73604510f9212fcca9673c3d2b9005a05f81f6edff7cdd4512ba6b5a571c
3
  size 6840