ljcamargo commited on
Commit
da10c6b
·
verified ·
1 Parent(s): 4ac7221

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6da720ded8a1790196006900a33f5feace3793698528740ce7024e10c4401eb5
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd1607970824f8435dff0ec490e2197f145ee38b543dfac0f375baae91f3e84
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e57022989b1cc7d3b10c99234ae80d8c4ad9cac35357f91704bca82ec26be329
3
  size 2479122661
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05aa8a7bf492a3ad20e0d6edfff8c0717a4f0bbc1219a587cbf095503cf2d00e
3
  size 2479122661
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b73d4cd42a94a1103ceefa33eaa6ef4fb3cf4f32efbe8707ea89780e8b5d2e9
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b7290d8ca07041eee19d5fa227aba688d13ea17f7d6f3c0e4a7903d483d295
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6d58568e157b4e33c18eb009b274df7561c47b6a7f0984ca3989b2a6bc67549
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ed95effe7569c75627b601fa080ba53727e518015156dc63042342eab93ca8
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.04,
6
  "eval_steps": 500,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -78,6 +78,76 @@
78
  "learning_rate": 4.8176352705410824e-05,
79
  "loss": 1.3992,
80
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
82
  ],
83
  "logging_steps": 10,
@@ -97,7 +167,7 @@
97
  "attributes": {}
98
  }
99
  },
100
- "total_flos": 1819402434478080.0,
101
  "train_batch_size": 2,
102
  "trial_name": null,
103
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
78
  "learning_rate": 4.8176352705410824e-05,
79
  "loss": 1.3992,
80
  "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.044,
84
+ "grad_norm": 42.20634841918945,
85
+ "learning_rate": 4.797595190380762e-05,
86
+ "loss": 1.6443,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.048,
91
+ "grad_norm": 24.657821655273438,
92
+ "learning_rate": 4.7775551102204415e-05,
93
+ "loss": 1.8471,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.052,
98
+ "grad_norm": 38.727420806884766,
99
+ "learning_rate": 4.7575150300601207e-05,
100
+ "loss": 1.5293,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.056,
105
+ "grad_norm": 31.97869873046875,
106
+ "learning_rate": 4.7374749498998e-05,
107
+ "loss": 1.6212,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.06,
112
+ "grad_norm": 31.056962966918945,
113
+ "learning_rate": 4.717434869739479e-05,
114
+ "loss": 1.4407,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.064,
119
+ "grad_norm": 29.63347053527832,
120
+ "learning_rate": 4.697394789579159e-05,
121
+ "loss": 1.1833,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.068,
126
+ "grad_norm": 44.844268798828125,
127
+ "learning_rate": 4.677354709418838e-05,
128
+ "loss": 1.5756,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.072,
133
+ "grad_norm": 31.4070987701416,
134
+ "learning_rate": 4.657314629258517e-05,
135
+ "loss": 1.4358,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.076,
140
+ "grad_norm": 26.982776641845703,
141
+ "learning_rate": 4.6372745490981964e-05,
142
+ "loss": 1.234,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.08,
147
+ "grad_norm": 19.802730560302734,
148
+ "learning_rate": 4.617234468937876e-05,
149
+ "loss": 1.3504,
150
+ "step": 200
151
  }
152
  ],
153
  "logging_steps": 10,
 
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 3595273633732608.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null