Dohahemdann commited on
Commit
03ed2b1
·
verified ·
1 Parent(s): 8db7267

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b977ee477885a307d29621bacd3fc0e940328cfdaacfb6d47689a78a63745e
3
  size 75579856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fb75a4a36553b73994dbd2353c33c1160b78c14db20d5475338392d9c4157d
3
  size 75579856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df69cf2532df815e8d4be378229c606c5c64b77332e5edd954d162369652f8a1
3
  size 39015574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f97ab80c0e9ed284bc1431c9250044bcd13f5ab7aee843eb60252e95c35c049
3
  size 39015574
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77653f901a737c32c712d4d98c59bda07889be56e9af169aa9525194aa1f203a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b432237b00d1d90d6724248db250f29e52844d78501ed7ee56e7727e8a88c36f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b240dd9ddd423073bd07780b2cb840e4271f59c698cb81fac8efa021107c82d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a625473bb0a513e1727ebc85c0930216deed7d5ec1e4fd299b7895bbd369bcc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.006916825177243645,
6
  "eval_steps": 500,
7
- "global_step": 10,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -78,6 +78,76 @@
78
  "learning_rate": 1.2328767123287671e-05,
79
  "loss": 2.86,
80
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
82
  ],
83
  "logging_steps": 1,
@@ -97,7 +167,7 @@
97
  "attributes": {}
98
  }
99
  },
100
- "total_flos": 347472609361920.0,
101
  "train_batch_size": 2,
102
  "trial_name": null,
103
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.01383365035448729,
6
  "eval_steps": 500,
7
+ "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
78
  "learning_rate": 1.2328767123287671e-05,
79
  "loss": 2.86,
80
  "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.00760850769496801,
84
+ "grad_norm": 0.3276166021823883,
85
+ "learning_rate": 1.3698630136986302e-05,
86
+ "loss": 2.5563,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.008300190212692375,
91
+ "grad_norm": 0.6293095350265503,
92
+ "learning_rate": 1.5068493150684931e-05,
93
+ "loss": 1.6394,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.008991872730416739,
98
+ "grad_norm": 0.3222309648990631,
99
+ "learning_rate": 1.643835616438356e-05,
100
+ "loss": 1.8561,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.009683555248141103,
105
+ "grad_norm": 0.2540973126888275,
106
+ "learning_rate": 1.780821917808219e-05,
107
+ "loss": 3.0327,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.010375237765865467,
112
+ "grad_norm": 0.2383764237165451,
113
+ "learning_rate": 1.9178082191780822e-05,
114
+ "loss": 2.2992,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.011066920283589833,
119
+ "grad_norm": 0.35212621092796326,
120
+ "learning_rate": 2.0547945205479453e-05,
121
+ "loss": 2.8462,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.011758602801314197,
126
+ "grad_norm": 0.23731686174869537,
127
+ "learning_rate": 2.1917808219178083e-05,
128
+ "loss": 2.8213,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.012450285319038561,
133
+ "grad_norm": 0.3209351599216461,
134
+ "learning_rate": 2.328767123287671e-05,
135
+ "loss": 2.4589,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.013141967836762926,
140
+ "grad_norm": 0.22553832828998566,
141
+ "learning_rate": 2.4657534246575342e-05,
142
+ "loss": 2.358,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.01383365035448729,
147
+ "grad_norm": 0.29168522357940674,
148
+ "learning_rate": 2.6027397260273973e-05,
149
+ "loss": 2.5836,
150
+ "step": 20
151
  }
152
  ],
153
  "logging_steps": 1,
 
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 673991251476480.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null