lamnt2008 commited on
Commit
3b45d92
·
1 Parent(s): e1fd054

Upload 8 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +28 -88
  6. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c169775c366010e4c5eaffadb34df1ccf4d9628fd6397bae1c7164e585adaf43
3
  size 686681861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b2b4d3e8b3ca0710d7da08763dd08c9fc1b6c9025b455de35158b306e28224
3
  size 686681861
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ab5ed813a4061484bf42a72dcb86ac4bd68e8e621bd367d2d73de57acf6916
3
  size 347081849
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ebed1c9c75137b486f7f2940de8644ac13858c1d6a8d3695a6a51065b153c29
3
  size 347081849
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c800d296d274356dc7e1009bd7b9538d79376fb6a4db1bb2d6965f7da8688b3b
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066b0a5d1c6a4b9d7d21cbb83f91a0857780e54f0c0d79887ffbc027746303b7
3
  size 14639
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b91afddbb850d225c9270f7a06401619e44b4668e24d6085fb2561753e178a88
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0539d160c7887cdd2d15e84b70d756c48293cc6e18cf2a2dcdbf254b4cb7de
3
  size 627
trainer_state.json CHANGED
@@ -1,118 +1,58 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.763363091874788,
5
- "global_step": 51000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.34,
12
- "learning_rate": 9.915244660413606e-06,
13
- "loss": 3.3726,
14
- "step": 3000
15
- },
16
  {
17
  "epoch": 0.68,
18
- "learning_rate": 9.830489320827214e-06,
19
- "loss": 2.1283,
20
- "step": 6000
21
- },
22
- {
23
- "epoch": 1.02,
24
- "learning_rate": 9.745733981240819e-06,
25
- "loss": 1.8301,
26
- "step": 9000
27
  },
28
  {
29
  "epoch": 1.36,
30
- "learning_rate": 9.660978641654425e-06,
31
- "loss": 1.6611,
32
- "step": 12000
33
- },
34
- {
35
- "epoch": 1.7,
36
- "learning_rate": 9.57622330206803e-06,
37
- "loss": 1.6012,
38
- "step": 15000
39
  },
40
  {
41
  "epoch": 2.03,
42
- "learning_rate": 9.491467962481637e-06,
43
- "loss": 1.5255,
44
- "step": 18000
45
- },
46
- {
47
- "epoch": 2.37,
48
- "learning_rate": 9.406712622895243e-06,
49
- "loss": 1.4598,
50
- "step": 21000
51
  },
52
  {
53
- "epoch": 2.71,
54
- "learning_rate": 9.321957283308849e-06,
55
- "loss": 1.4156,
56
- "step": 24000
57
  },
58
  {
59
- "epoch": 3.05,
60
- "learning_rate": 9.237201943722456e-06,
61
- "loss": 1.3908,
62
- "step": 27000
63
  },
64
  {
65
- "epoch": 3.39,
66
- "learning_rate": 9.152446604136061e-06,
67
- "loss": 1.3406,
68
  "step": 30000
69
  },
70
  {
71
- "epoch": 3.73,
72
- "learning_rate": 9.067691264549667e-06,
73
- "loss": 1.3277,
74
- "step": 33000
75
- },
76
- {
77
- "epoch": 4.07,
78
- "learning_rate": 8.982935924963272e-06,
79
- "loss": 1.3095,
80
- "step": 36000
81
- },
82
- {
83
- "epoch": 4.41,
84
- "learning_rate": 8.89818058537688e-06,
85
- "loss": 1.253,
86
- "step": 39000
87
- },
88
- {
89
- "epoch": 4.75,
90
- "learning_rate": 8.813425245790485e-06,
91
- "loss": 1.2553,
92
- "step": 42000
93
- },
94
- {
95
- "epoch": 5.09,
96
- "learning_rate": 8.72866990620409e-06,
97
- "loss": 1.2177,
98
- "step": 45000
99
- },
100
- {
101
- "epoch": 5.42,
102
- "learning_rate": 8.643914566617696e-06,
103
- "loss": 1.198,
104
- "step": 48000
105
- },
106
- {
107
- "epoch": 5.76,
108
- "learning_rate": 8.559159227031304e-06,
109
- "loss": 1.2163,
110
- "step": 51000
111
  }
112
  ],
113
- "max_steps": 353960,
114
  "num_train_epochs": 40,
115
- "total_flos": 3.162362109935616e+19,
116
  "trial_name": null,
117
  "trial_params": null
118
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.03954802259887,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 0.68,
12
+ "learning_rate": 9.830508474576272e-06,
13
+ "loss": 3.108,
14
+ "step": 3000
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 1.36,
18
+ "learning_rate": 9.661016949152544e-06,
19
+ "loss": 2.0197,
20
+ "step": 6000
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 2.03,
24
+ "learning_rate": 9.491525423728815e-06,
25
+ "loss": 1.6483,
26
+ "step": 9000
 
 
 
 
 
 
27
  },
28
  {
29
+ "epoch": 2.26,
30
+ "learning_rate": 9.435028248587572e-06,
31
+ "loss": 1.4748,
32
+ "step": 10000
33
  },
34
  {
35
+ "epoch": 4.52,
36
+ "learning_rate": 8.870056497175143e-06,
37
+ "loss": 1.2986,
38
+ "step": 20000
39
  },
40
  {
41
+ "epoch": 6.78,
42
+ "learning_rate": 8.305084745762712e-06,
43
+ "loss": 1.074,
44
  "step": 30000
45
  },
46
  {
47
+ "epoch": 9.04,
48
+ "learning_rate": 7.740112994350283e-06,
49
+ "loss": 0.9305,
50
+ "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
  ],
53
+ "max_steps": 177000,
54
  "num_train_epochs": 40,
55
+ "total_flos": 4.960009951683527e+19,
56
  "trial_name": null,
57
  "trial_params": null
58
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ae9bc04ec11a5436f1c2b5d27a074129d73b2e6339507f9d2f4304a8062bf8
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d951a2f5f0efbcf1dae67bc7db374d5cf8dc574b8d82c1b01a1d9990a79c623e
3
  size 3643