SystemAdmin123 commited on
Commit
89543b3
·
verified ·
1 Parent(s): d6ae0a6

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b356afc6d082488f2555ef406378069504708e31271d6e086beee3c91f1f5c83
3
  size 662430992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd666fbb2e21f97e8d17488cc5d8a9b18c0a8214c01ae16975129e3ddda35428
3
  size 662430992
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b27e660c26757696366e20c37f079effc428107f2e9ffa74898f7f9b47361d
3
  size 674384884
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0466bdd6ee79dd151ab52e97ce199c24b36d8ca9bc07fcc401488f453d1a5c9f
3
  size 674384884
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c63e25c94fd32bbfac74e77e235933e40e71931ccdab4688693badf62fc9d895
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b4ffea7d126029ee70aa7566703f287532e95671ece76846e776643564a631e
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42d5a9f1444725574e6e96d7460d2ae867d4c9d4d70a147ad1691b8ce1c4b0b8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d22d068494c14e8847a8db8a2ed0232120d3dfab2e76b5604ae0a39a1b140a25
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e844065e2ec1428132da97db98340b7020ef84b112f1abe984badffc1a1d8a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90710af1ca4896b473e6e0eb3fbd88a6b938e5e7b17f9d85f7f48d00f56a79bb
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3774449d16a3fcd1d29aba215bc5841e6b8bc77f3a4c81e9a133c2a3c87bcc8d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcba06152d0f2be800761c223eaba18b4b433f1e651708bc3ad9af02ed0b3614
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 14.285714285714286,
5
- "eval_steps": 20,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -11,119 +11,79 @@
11
  {
12
  "epoch": 0.14285714285714285,
13
  "eval_loss": 3.078927516937256,
14
- "eval_runtime": 4.9541,
15
- "eval_samples_per_second": 302.984,
16
- "eval_steps_per_second": 3.23,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.4285714285714286,
21
- "grad_norm": 9.4375,
22
  "learning_rate": 0.00019863613034027224,
23
- "loss": 6.178,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.857142857142857,
28
- "grad_norm": 5.375,
29
  "learning_rate": 0.0001879473751206489,
30
- "loss": 5.2701,
31
- "step": 20
32
- },
33
- {
34
- "epoch": 2.857142857142857,
35
- "eval_loss": 3.3285892009735107,
36
- "eval_runtime": 5.1935,
37
- "eval_samples_per_second": 289.018,
38
- "eval_steps_per_second": 3.081,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 4.285714285714286,
43
- "grad_norm": 4.21875,
44
  "learning_rate": 0.00016772815716257412,
45
- "loss": 4.6968,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 5.714285714285714,
50
- "grad_norm": 2.421875,
51
  "learning_rate": 0.00014016954246529696,
52
- "loss": 4.3713,
53
- "step": 40
54
- },
55
- {
56
- "epoch": 5.714285714285714,
57
- "eval_loss": 3.2811784744262695,
58
- "eval_runtime": 4.8,
59
- "eval_samples_per_second": 312.707,
60
- "eval_steps_per_second": 3.333,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 7.142857142857143,
65
- "grad_norm": 2.578125,
66
  "learning_rate": 0.00010825793454723325,
67
- "loss": 4.1039,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 8.571428571428571,
72
- "grad_norm": 2.28125,
73
  "learning_rate": 7.54514512859201e-05,
74
- "loss": 3.885,
75
- "step": 60
76
- },
77
- {
78
- "epoch": 8.571428571428571,
79
- "eval_loss": 3.0508906841278076,
80
- "eval_runtime": 4.9305,
81
- "eval_samples_per_second": 304.43,
82
- "eval_steps_per_second": 3.245,
83
  "step": 60
84
  },
85
  {
86
  "epoch": 10.0,
87
- "grad_norm": 2.828125,
88
  "learning_rate": 4.530518418775733e-05,
89
- "loss": 3.7931,
90
  "step": 70
91
  },
92
  {
93
  "epoch": 11.428571428571429,
94
- "grad_norm": 1.5625,
95
  "learning_rate": 2.1085949060360654e-05,
96
- "loss": 3.7326,
97
- "step": 80
98
- },
99
- {
100
- "epoch": 11.428571428571429,
101
- "eval_loss": 3.0470147132873535,
102
- "eval_runtime": 4.9222,
103
- "eval_samples_per_second": 304.944,
104
- "eval_steps_per_second": 3.251,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 12.857142857142858,
109
- "grad_norm": 2.046875,
110
  "learning_rate": 5.418275829936537e-06,
111
- "loss": 3.7097,
112
  "step": 90
113
  },
114
  {
115
  "epoch": 14.285714285714286,
116
- "grad_norm": 1.6171875,
117
  "learning_rate": 0.0,
118
- "loss": 3.6947,
119
- "step": 100
120
- },
121
- {
122
- "epoch": 14.285714285714286,
123
- "eval_loss": 3.0370891094207764,
124
- "eval_runtime": 5.0102,
125
- "eval_samples_per_second": 299.59,
126
- "eval_steps_per_second": 3.194,
127
  "step": 100
128
  }
129
  ],
@@ -131,7 +91,7 @@
131
  "max_steps": 100,
132
  "num_input_tokens_seen": 0,
133
  "num_train_epochs": 15,
134
- "save_steps": 20,
135
  "stateful_callbacks": {
136
  "TrainerControl": {
137
  "args": {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 14.285714285714286,
5
+ "eval_steps": 200,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
11
  {
12
  "epoch": 0.14285714285714285,
13
  "eval_loss": 3.078927516937256,
14
+ "eval_runtime": 4.841,
15
+ "eval_samples_per_second": 310.059,
16
+ "eval_steps_per_second": 3.305,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 1.4285714285714286,
21
+ "grad_norm": 9.25,
22
  "learning_rate": 0.00019863613034027224,
23
+ "loss": 6.1788,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.857142857142857,
28
+ "grad_norm": 5.25,
29
  "learning_rate": 0.0001879473751206489,
30
+ "loss": 5.2684,
 
 
 
 
 
 
 
 
31
  "step": 20
32
  },
33
  {
34
  "epoch": 4.285714285714286,
35
+ "grad_norm": 5.28125,
36
  "learning_rate": 0.00016772815716257412,
37
+ "loss": 4.6887,
38
  "step": 30
39
  },
40
  {
41
  "epoch": 5.714285714285714,
42
+ "grad_norm": 2.3125,
43
  "learning_rate": 0.00014016954246529696,
44
+ "loss": 4.3802,
 
 
 
 
 
 
 
 
45
  "step": 40
46
  },
47
  {
48
  "epoch": 7.142857142857143,
49
+ "grad_norm": 3.484375,
50
  "learning_rate": 0.00010825793454723325,
51
+ "loss": 4.1083,
52
  "step": 50
53
  },
54
  {
55
  "epoch": 8.571428571428571,
56
+ "grad_norm": 2.5625,
57
  "learning_rate": 7.54514512859201e-05,
58
+ "loss": 3.8961,
 
 
 
 
 
 
 
 
59
  "step": 60
60
  },
61
  {
62
  "epoch": 10.0,
63
+ "grad_norm": 2.96875,
64
  "learning_rate": 4.530518418775733e-05,
65
+ "loss": 3.8097,
66
  "step": 70
67
  },
68
  {
69
  "epoch": 11.428571428571429,
70
+ "grad_norm": 1.546875,
71
  "learning_rate": 2.1085949060360654e-05,
72
+ "loss": 3.7435,
 
 
 
 
 
 
 
 
73
  "step": 80
74
  },
75
  {
76
  "epoch": 12.857142857142858,
77
+ "grad_norm": 2.328125,
78
  "learning_rate": 5.418275829936537e-06,
79
+ "loss": 3.721,
80
  "step": 90
81
  },
82
  {
83
  "epoch": 14.285714285714286,
84
+ "grad_norm": 1.453125,
85
  "learning_rate": 0.0,
86
+ "loss": 3.7058,
 
 
 
 
 
 
 
 
87
  "step": 100
88
  }
89
  ],
 
91
  "max_steps": 100,
92
  "num_input_tokens_seen": 0,
93
  "num_train_epochs": 15,
94
+ "save_steps": 200,
95
  "stateful_callbacks": {
96
  "TrainerControl": {
97
  "args": {
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38c45a65d2feaa5c8b7363d20e9b9067a7c858a6c5c36582490c8414ec321027
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdfa99156b89b16ec5fa2a9dbf1b49622d09344a5aea17d9ad3478c38d45547
3
  size 6840