Erland commited on
Commit
39faa2d
·
verified ·
1 Parent(s): 12ff6a0

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:961f35029677070ede9234dcd4cb050b4d0e492a911978fedb6dffe8847b8d97
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388de8b1fd2d9b827937b3a78d2f0c4f9e28ffa087987c50cf8c48941c605e27
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb70bad3451bf3a8d2b698f9922fefa5f55f13dcc960b77e5e77adce9eefe424
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37278b058da42c070ece918b5474e74f3a0c469f409efa5127d07d2cc5355219
3
  size 85723284
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:276ca034010af89433fdc86003e552246afaf008d6bdfe28c94401b0f98e39e6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc550cfbb47ad4cbc37125ea640e4b6df0c324dad2c713e9b18c9c4eb2ecb33
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0008,
5
  "eval_steps": 500,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77,6 +77,76 @@
77
  "learning_rate": 0.00019487179487179487,
78
  "loss": 0.3175,
79
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
  ],
82
  "logging_steps": 1,
@@ -96,7 +166,7 @@
96
  "attributes": {}
97
  }
98
  },
99
- "total_flos": 2489379596009472.0,
100
  "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0016,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77
  "learning_rate": 0.00019487179487179487,
78
  "loss": 0.3175,
79
  "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.00088,
83
+ "grad_norm": 0.5333597660064697,
84
+ "learning_rate": 0.00019384615384615385,
85
+ "loss": 0.5604,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.00096,
90
+ "grad_norm": 0.49285152554512024,
91
+ "learning_rate": 0.00019282051282051282,
92
+ "loss": 0.6799,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.00104,
97
+ "grad_norm": 0.5650416016578674,
98
+ "learning_rate": 0.00019179487179487182,
99
+ "loss": 0.3743,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.00112,
104
+ "grad_norm": 0.3586512506008148,
105
+ "learning_rate": 0.0001907692307692308,
106
+ "loss": 0.3659,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.0012,
111
+ "grad_norm": 0.4746282398700714,
112
+ "learning_rate": 0.00018974358974358974,
113
+ "loss": 0.511,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.00128,
118
+ "grad_norm": 0.5259126424789429,
119
+ "learning_rate": 0.0001887179487179487,
120
+ "loss": 0.2649,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.00136,
125
+ "grad_norm": 0.5694918036460876,
126
+ "learning_rate": 0.0001876923076923077,
127
+ "loss": 0.5794,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.00144,
132
+ "grad_norm": 0.6411933898925781,
133
+ "learning_rate": 0.0001866666666666667,
134
+ "loss": 0.356,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.00152,
139
+ "grad_norm": 0.4190411865711212,
140
+ "learning_rate": 0.00018564102564102566,
141
+ "loss": 0.4555,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.0016,
146
+ "grad_norm": 0.48357734084129333,
147
+ "learning_rate": 0.00018461538461538463,
148
+ "loss": 0.4766,
149
+ "step": 20
150
  }
151
  ],
152
  "logging_steps": 1,
 
166
  "attributes": {}
167
  }
168
  },
169
+ "total_flos": 5377867744100352.0,
170
  "train_batch_size": 2,
171
  "trial_name": null,
172
  "trial_params": null