jinchang1223 commited on
Commit
b8977b6
·
verified ·
1 Parent(s): b21ca55

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. trainer_state.json +34 -34
  5. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2c85a787e7a40a6739570825d5dac5d0da5dd949c5d42eed6ecb390485c0387
3
  size 2384234968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad376b0758013829099b368873eb5fd6337f67cf5e848a25b2b474b7983d6801
3
  size 2384234968
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac84b9582ff7b04d97c3564b5a701e65ff92f062e806c2f383748618a2ede63d
3
  size 4768662910
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a42c8ce6ec1d87a350b8754186e37b669ffc453a2365356e778b02dc08863a60
3
  size 4768662910
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b78bc9f0d7e56e66d9414db603a96557a219cd78b5a76e7ebc4ea10e197295
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435
3
  size 14244
trainer_state.json CHANGED
@@ -11,109 +11,109 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.14814814814814814,
14
- "grad_norm": 5.238080978393555,
15
  "learning_rate": 3.96e-06,
16
- "loss": 1.4747,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.2962962962962963,
21
- "grad_norm": 5.111464023590088,
22
  "learning_rate": 7.960000000000002e-06,
23
- "loss": 1.2339,
24
  "step": 200
25
  },
26
  {
27
  "epoch": 0.4444444444444444,
28
- "grad_norm": 4.6231770515441895,
29
  "learning_rate": 1.196e-05,
30
- "loss": 1.3136,
31
  "step": 300
32
  },
33
  {
34
  "epoch": 0.5925925925925926,
35
- "grad_norm": 4.638155460357666,
36
  "learning_rate": 1.5960000000000003e-05,
37
- "loss": 1.3179,
38
  "step": 400
39
  },
40
  {
41
  "epoch": 0.7407407407407407,
42
- "grad_norm": 4.396394729614258,
43
  "learning_rate": 1.9960000000000002e-05,
44
- "loss": 1.3025,
45
  "step": 500
46
  },
47
  {
48
  "epoch": 0.8888888888888888,
49
- "grad_norm": 2.8801891803741455,
50
  "learning_rate": 1.87016393442623e-05,
51
- "loss": 1.2734,
52
  "step": 600
53
  },
54
  {
55
  "epoch": 1.0,
56
- "eval_loss": 1.3948965072631836,
57
- "eval_runtime": 179.2505,
58
- "eval_samples_per_second": 3.347,
59
- "eval_steps_per_second": 0.418,
60
  "step": 675
61
  },
62
  {
63
  "epoch": 1.037037037037037,
64
- "grad_norm": 4.732747554779053,
65
  "learning_rate": 1.7390163934426233e-05,
66
- "loss": 1.2474,
67
  "step": 700
68
  },
69
  {
70
  "epoch": 1.1851851851851851,
71
- "grad_norm": 4.509610652923584,
72
  "learning_rate": 1.6078688524590164e-05,
73
- "loss": 1.0051,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 1.3333333333333333,
78
- "grad_norm": 4.83737325668335,
79
  "learning_rate": 1.47672131147541e-05,
80
- "loss": 1.0629,
81
  "step": 900
82
  },
83
  {
84
  "epoch": 1.4814814814814814,
85
- "grad_norm": 5.032131671905518,
86
  "learning_rate": 1.3455737704918036e-05,
87
- "loss": 1.0543,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.6296296296296298,
92
- "grad_norm": 5.264540672302246,
93
  "learning_rate": 1.2144262295081968e-05,
94
- "loss": 1.0929,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.7777777777777777,
99
- "grad_norm": 5.0603742599487305,
100
  "learning_rate": 1.0832786885245903e-05,
101
- "loss": 1.0502,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.925925925925926,
106
- "grad_norm": 5.43220853805542,
107
  "learning_rate": 9.521311475409837e-06,
108
- "loss": 1.0456,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 2.0,
113
- "eval_loss": 1.391052484512329,
114
- "eval_runtime": 179.1632,
115
- "eval_samples_per_second": 3.349,
116
- "eval_steps_per_second": 0.419,
117
  "step": 1350
118
  }
119
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.14814814814814814,
14
+ "grad_norm": 4.711470127105713,
15
  "learning_rate": 3.96e-06,
16
+ "loss": 1.4337,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.2962962962962963,
21
+ "grad_norm": 4.751404285430908,
22
  "learning_rate": 7.960000000000002e-06,
23
+ "loss": 1.2046,
24
  "step": 200
25
  },
26
  {
27
  "epoch": 0.4444444444444444,
28
+ "grad_norm": 4.166985988616943,
29
  "learning_rate": 1.196e-05,
30
+ "loss": 1.2721,
31
  "step": 300
32
  },
33
  {
34
  "epoch": 0.5925925925925926,
35
+ "grad_norm": 4.238899230957031,
36
  "learning_rate": 1.5960000000000003e-05,
37
+ "loss": 1.2809,
38
  "step": 400
39
  },
40
  {
41
  "epoch": 0.7407407407407407,
42
+ "grad_norm": 4.066563606262207,
43
  "learning_rate": 1.9960000000000002e-05,
44
+ "loss": 1.2463,
45
  "step": 500
46
  },
47
  {
48
  "epoch": 0.8888888888888888,
49
+ "grad_norm": 3.029653787612915,
50
  "learning_rate": 1.87016393442623e-05,
51
+ "loss": 1.2347,
52
  "step": 600
53
  },
54
  {
55
  "epoch": 1.0,
56
+ "eval_loss": 1.3202142715454102,
57
+ "eval_runtime": 179.8462,
58
+ "eval_samples_per_second": 3.336,
59
+ "eval_steps_per_second": 0.417,
60
  "step": 675
61
  },
62
  {
63
  "epoch": 1.037037037037037,
64
+ "grad_norm": 3.9755260944366455,
65
  "learning_rate": 1.7390163934426233e-05,
66
+ "loss": 1.2086,
67
  "step": 700
68
  },
69
  {
70
  "epoch": 1.1851851851851851,
71
+ "grad_norm": 4.08528995513916,
72
  "learning_rate": 1.6078688524590164e-05,
73
+ "loss": 0.9801,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 1.3333333333333333,
78
+ "grad_norm": 4.117591381072998,
79
  "learning_rate": 1.47672131147541e-05,
80
+ "loss": 1.0272,
81
  "step": 900
82
  },
83
  {
84
  "epoch": 1.4814814814814814,
85
+ "grad_norm": 4.108471870422363,
86
  "learning_rate": 1.3455737704918036e-05,
87
+ "loss": 1.0307,
88
  "step": 1000
89
  },
90
  {
91
  "epoch": 1.6296296296296298,
92
+ "grad_norm": 4.6597161293029785,
93
  "learning_rate": 1.2144262295081968e-05,
94
+ "loss": 1.0432,
95
  "step": 1100
96
  },
97
  {
98
  "epoch": 1.7777777777777777,
99
+ "grad_norm": 4.704660892486572,
100
  "learning_rate": 1.0832786885245903e-05,
101
+ "loss": 1.0158,
102
  "step": 1200
103
  },
104
  {
105
  "epoch": 1.925925925925926,
106
+ "grad_norm": 5.08992338180542,
107
  "learning_rate": 9.521311475409837e-06,
108
+ "loss": 1.0238,
109
  "step": 1300
110
  },
111
  {
112
  "epoch": 2.0,
113
+ "eval_loss": 1.3168182373046875,
114
+ "eval_runtime": 179.8766,
115
+ "eval_samples_per_second": 3.336,
116
+ "eval_steps_per_second": 0.417,
117
  "step": 1350
118
  }
119
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df73858dfad5311b29b2ac8a196a53ff9d692a10945cc7183c7586a3bd7becb3
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660b7cc6409839c5c5722f4d272744990cfd2568b71e11c72fb763d426f9a7fd
3
  size 5240