hajimemat commited on
Commit
b072aa0
·
verified ·
1 Parent(s): 0bc265b

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d98e97b81b6c1fe48cad320f2a890e7e61e83dfff7d26d334198e8a74f852b74
3
  size 194563400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ddf70e3b717c4942eb29e5cb233383ecd1afc20c58cc985636fa0e06d5b136
3
  size 194563400
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d6595f37c512d7abd41b810217cfc99365ebf48ebe28d646eced18519db33b8
3
  size 99240837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1d2da135690fff7c0e120e786351cf9750b8ea31f819ca37b532af0ea60ef3
3
  size 99240837
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f374b74741f6041ab3fbc6fa2f1ad297bd742776ae9f7be70ea73be713dbbd9f
3
- size 14581
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11ed94202eb6e3eaeb8f032cfd9fd46e9b4657a59638d69479f047c360d252a9
3
+ size 14709
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ef0a7b786a9a7cce22464cb50d85b3bb30f4b7314d78a4eeb6d98db65c58909
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f3a67d47428d0f6084a0c2e68b54a6b89dcabf900532b585b519c3b42aa7fc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2382015780854548,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77,6 +77,76 @@
77
  "learning_rate": 8.90915741234015e-05,
78
  "loss": 1.0931,
79
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
  ],
82
  "logging_steps": 10,
@@ -96,7 +166,7 @@
96
  "attributes": {}
97
  }
98
  },
99
- "total_flos": 8.45623326670848e+16,
100
  "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4764031561709096,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77
  "learning_rate": 8.90915741234015e-05,
78
  "loss": 1.0931,
79
  "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.2620217358940003,
83
+ "grad_norm": 0.09928414970636368,
84
+ "learning_rate": 8.656475314362148e-05,
85
+ "loss": 1.0993,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.28584189370254576,
90
+ "grad_norm": 0.1041741743683815,
91
+ "learning_rate": 8.3819108836604e-05,
92
+ "loss": 1.0937,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.3096620515110913,
97
+ "grad_norm": 0.10851814597845078,
98
+ "learning_rate": 8.087107262799855e-05,
99
+ "loss": 1.0663,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.33348220931963674,
104
+ "grad_norm": 0.10271850228309631,
105
+ "learning_rate": 7.773828716643591e-05,
106
+ "loss": 1.0555,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.3573023671281822,
111
+ "grad_norm": 0.11661435663700104,
112
+ "learning_rate": 7.443950074034368e-05,
113
+ "loss": 1.0421,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.3811225249367277,
118
+ "grad_norm": 0.1197165921330452,
119
+ "learning_rate": 7.099445507801323e-05,
120
+ "loss": 1.0214,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.4049426827452732,
125
+ "grad_norm": 0.1095174252986908,
126
+ "learning_rate": 6.742376720238346e-05,
127
+ "loss": 1.0386,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.4287628405538187,
132
+ "grad_norm": 0.10334830731153488,
133
+ "learning_rate": 6.374880604758615e-05,
134
+ "loss": 1.0204,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.45258299836236415,
139
+ "grad_norm": 0.10270854085683823,
140
+ "learning_rate": 5.9991564575646855e-05,
141
+ "loss": 1.0138,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.4764031561709096,
146
+ "grad_norm": 0.10575564205646515,
147
+ "learning_rate": 5.6174528158664096e-05,
148
+ "loss": 1.0359,
149
+ "step": 200
150
  }
151
  ],
152
  "logging_steps": 10,
 
166
  "attributes": {}
167
  }
168
  },
169
+ "total_flos": 1.691246653341696e+17,
170
  "train_batch_size": 2,
171
  "trial_name": null,
172
  "trial_params": null