Training in progress, step 20, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8668296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:548783adf4d23308302c202bf40a281da4ae1dc20aab5f9f2f2e8577f67caa46
|
| 3 |
size 8668296
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 17405562
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5b361053329544d4e8b662a787c9b8569aa3deec709e1cb498607fd498cf2a0
|
| 3 |
size 17405562
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2da4182a3b74e8a27beef3c980202435a704abfb52c6bca6860053ff8533dede
|
| 3 |
size 13990
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bf70f0f705c641ac0aa6196d80fdac69cf053952800eb6a8b0a95b3368eb426
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,15 +1,16 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric":
|
| 3 |
-
"best_model_checkpoint": "./exp/wft-test-model/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 10,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.01,
|
|
|
|
| 13 |
"grad_norm": 32.254364013671875,
|
| 14 |
"learning_rate": 1e-05,
|
| 15 |
"loss": 4.3127,
|
|
@@ -17,6 +18,7 @@
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.02,
|
|
|
|
| 20 |
"grad_norm": 13.570059776306152,
|
| 21 |
"learning_rate": 2e-05,
|
| 22 |
"loss": 2.7251,
|
|
@@ -24,6 +26,7 @@
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.03,
|
|
|
|
| 27 |
"grad_norm": 27.845048904418945,
|
| 28 |
"learning_rate": 3e-05,
|
| 29 |
"loss": 3.8856,
|
|
@@ -31,6 +34,7 @@
|
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.04,
|
|
|
|
| 34 |
"grad_norm": 18.186725616455078,
|
| 35 |
"learning_rate": 4e-05,
|
| 36 |
"loss": 2.5123,
|
|
@@ -38,6 +42,7 @@
|
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.05,
|
|
|
|
| 41 |
"grad_norm": 20.293132781982422,
|
| 42 |
"learning_rate": 5e-05,
|
| 43 |
"loss": 2.9692,
|
|
@@ -45,6 +50,7 @@
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.06,
|
|
|
|
| 48 |
"grad_norm": 20.205909729003906,
|
| 49 |
"learning_rate": 6e-05,
|
| 50 |
"loss": 3.336,
|
|
@@ -52,6 +58,7 @@
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.07,
|
|
|
|
| 55 |
"grad_norm": 14.420604705810547,
|
| 56 |
"learning_rate": 7.000000000000001e-05,
|
| 57 |
"loss": 2.9546,
|
|
@@ -59,6 +66,7 @@
|
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 0.08,
|
|
|
|
| 62 |
"grad_norm": 5.636794090270996,
|
| 63 |
"learning_rate": 8e-05,
|
| 64 |
"loss": 2.107,
|
|
@@ -66,6 +74,7 @@
|
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"epoch": 0.09,
|
|
|
|
| 69 |
"grad_norm": 16.66157341003418,
|
| 70 |
"learning_rate": 8.999999999999999e-05,
|
| 71 |
"loss": 2.8062,
|
|
@@ -73,6 +82,7 @@
|
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"epoch": 0.1,
|
|
|
|
| 76 |
"grad_norm": 8.142745971679688,
|
| 77 |
"learning_rate": 0.0001,
|
| 78 |
"loss": 2.4079,
|
|
@@ -91,6 +101,90 @@
|
|
| 91 |
"eval_wer": 312.2047244094488,
|
| 92 |
"eval_wer_time": 0.016916990280151367,
|
| 93 |
"step": 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
}
|
| 95 |
],
|
| 96 |
"logging_steps": 1,
|
|
@@ -110,7 +204,7 @@
|
|
| 110 |
"attributes": {}
|
| 111 |
}
|
| 112 |
},
|
| 113 |
-
"total_flos":
|
| 114 |
"train_batch_size": 4,
|
| 115 |
"trial_name": null,
|
| 116 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 258.26771653543307,
|
| 3 |
+
"best_model_checkpoint": "./exp/wft-test-model/checkpoint-20",
|
| 4 |
+
"epoch": 1.01,
|
| 5 |
"eval_steps": 10,
|
| 6 |
+
"global_step": 20,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.01,
|
| 13 |
+
"eval_pred": null,
|
| 14 |
"grad_norm": 32.254364013671875,
|
| 15 |
"learning_rate": 1e-05,
|
| 16 |
"loss": 4.3127,
|
|
|
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.02,
|
| 21 |
+
"eval_pred": null,
|
| 22 |
"grad_norm": 13.570059776306152,
|
| 23 |
"learning_rate": 2e-05,
|
| 24 |
"loss": 2.7251,
|
|
|
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"epoch": 0.03,
|
| 29 |
+
"eval_pred": null,
|
| 30 |
"grad_norm": 27.845048904418945,
|
| 31 |
"learning_rate": 3e-05,
|
| 32 |
"loss": 3.8856,
|
|
|
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"epoch": 0.04,
|
| 37 |
+
"eval_pred": null,
|
| 38 |
"grad_norm": 18.186725616455078,
|
| 39 |
"learning_rate": 4e-05,
|
| 40 |
"loss": 2.5123,
|
|
|
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"epoch": 0.05,
|
| 45 |
+
"eval_pred": null,
|
| 46 |
"grad_norm": 20.293132781982422,
|
| 47 |
"learning_rate": 5e-05,
|
| 48 |
"loss": 2.9692,
|
|
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"epoch": 0.06,
|
| 53 |
+
"eval_pred": null,
|
| 54 |
"grad_norm": 20.205909729003906,
|
| 55 |
"learning_rate": 6e-05,
|
| 56 |
"loss": 3.336,
|
|
|
|
| 58 |
},
|
| 59 |
{
|
| 60 |
"epoch": 0.07,
|
| 61 |
+
"eval_pred": null,
|
| 62 |
"grad_norm": 14.420604705810547,
|
| 63 |
"learning_rate": 7.000000000000001e-05,
|
| 64 |
"loss": 2.9546,
|
|
|
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"epoch": 0.08,
|
| 69 |
+
"eval_pred": null,
|
| 70 |
"grad_norm": 5.636794090270996,
|
| 71 |
"learning_rate": 8e-05,
|
| 72 |
"loss": 2.107,
|
|
|
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"epoch": 0.09,
|
| 77 |
+
"eval_pred": null,
|
| 78 |
"grad_norm": 16.66157341003418,
|
| 79 |
"learning_rate": 8.999999999999999e-05,
|
| 80 |
"loss": 2.8062,
|
|
|
|
| 82 |
},
|
| 83 |
{
|
| 84 |
"epoch": 0.1,
|
| 85 |
+
"eval_pred": null,
|
| 86 |
"grad_norm": 8.142745971679688,
|
| 87 |
"learning_rate": 0.0001,
|
| 88 |
"loss": 2.4079,
|
|
|
|
| 101 |
"eval_wer": 312.2047244094488,
|
| 102 |
"eval_wer_time": 0.016916990280151367,
|
| 103 |
"step": 10
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"epoch": 0.11,
|
| 107 |
+
"grad_norm": 4.166449546813965,
|
| 108 |
+
"learning_rate": 0.00011,
|
| 109 |
+
"loss": 2.0676,
|
| 110 |
+
"step": 11
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"epoch": 0.12,
|
| 114 |
+
"grad_norm": 17.860973358154297,
|
| 115 |
+
"learning_rate": 0.00012,
|
| 116 |
+
"loss": 2.3138,
|
| 117 |
+
"step": 12
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"epoch": 0.13,
|
| 121 |
+
"grad_norm": 6.342301368713379,
|
| 122 |
+
"learning_rate": 0.00013000000000000002,
|
| 123 |
+
"loss": 2.0556,
|
| 124 |
+
"step": 13
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"epoch": 0.14,
|
| 128 |
+
"grad_norm": 8.040596961975098,
|
| 129 |
+
"learning_rate": 0.00014000000000000001,
|
| 130 |
+
"loss": 1.9082,
|
| 131 |
+
"step": 14
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"epoch": 0.15,
|
| 135 |
+
"grad_norm": 4.461116790771484,
|
| 136 |
+
"learning_rate": 0.00015,
|
| 137 |
+
"loss": 2.1481,
|
| 138 |
+
"step": 15
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 0.16,
|
| 142 |
+
"grad_norm": 4.888762950897217,
|
| 143 |
+
"learning_rate": 0.00016,
|
| 144 |
+
"loss": 2.1165,
|
| 145 |
+
"step": 16
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.17,
|
| 149 |
+
"grad_norm": 19.042604446411133,
|
| 150 |
+
"learning_rate": 0.00017,
|
| 151 |
+
"loss": 2.4252,
|
| 152 |
+
"step": 17
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"epoch": 0.18,
|
| 156 |
+
"grad_norm": 5.278524398803711,
|
| 157 |
+
"learning_rate": 0.00017999999999999998,
|
| 158 |
+
"loss": 1.7136,
|
| 159 |
+
"step": 18
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"epoch": 0.19,
|
| 163 |
+
"grad_norm": 10.18915843963623,
|
| 164 |
+
"learning_rate": 0.00019,
|
| 165 |
+
"loss": 2.0686,
|
| 166 |
+
"step": 19
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"epoch": 1.01,
|
| 170 |
+
"grad_norm": 3.584944486618042,
|
| 171 |
+
"learning_rate": 0.0002,
|
| 172 |
+
"loss": 1.2303,
|
| 173 |
+
"step": 20
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 1.01,
|
| 177 |
+
"eval_cer": 100.0,
|
| 178 |
+
"eval_cer_time": 0.004556179046630859,
|
| 179 |
+
"eval_decode_time": 0.5213298797607422,
|
| 180 |
+
"eval_loss": 1.1646130084991455,
|
| 181 |
+
"eval_pred": "| i | Label | Prediction |\n| --- | --- | --- |\n| 0 | MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL | MristerTER QUILTER IS THE APOSAL OF THE MDLE CLASSES AND WE'RE GLAD TO WELLCOME HIS GOSPLETHTHTH) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\"-\"-\"- -\"-\"- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |\n| 1 | NOR IS MISTER QUILTER'S MANNER LESS INTERESTING THAN HIS MATTER | NorORIS MrISTER QUILTERSS MANER LESS INTERESTING THEN HIS MATTER - - - - - - - - - - - - - - -) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |\n| 2 | HE TELLS US THAT AT THIS FESTIVE SEASON OF THE YEAR WITH CHRISTMAS AND ROAST BEEF LOOMING BEFORE US SIMILES DRAWN FROM EATING AND ITS RESULTS OCCUR MOST READILY TO THE MIND | He TELLS US THAT AT THIS FESTTIVE SEASON OF THE YEAR WITH CHRISTMAS AND RAST BEEEF LUMING BEFORE US SIMILA DRAWN FROM EATING AND IT'S RESULTS ORPERUR MOST READITLY TO THE MIND - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\"- - - - -\"-\"-\"-\"- |\n| 3 | HE HAS GRAVE DOUBTS WHETHER SIR FREDERICK LEIGHTON'S WORK IS REALLY GREEK AFTER ALL AND CAN DISCOVER IN IT BUT LITTLE OF ROCKY ITHACA | He HAS GB DOTSTS WHETHER SURIRFDG LITTON'S'S WORK IS REALLY GREK AFTER ALL AND CAN DISCOVER IN IT BUT LITTLE OF WCKY ISHAKA IS IS IS IS IS IS IS IS IS IS+ - IS ---\"-\"-\"-\"-\"-\"- IS IS IS\"-\"-\"-++\"-\"-+++\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"- |\n| 4 | LINNELL'S PICTURES ARE A SORT OF UP GUARDS AND AT EM PAINTINGS AND MASON'S EXQUISITE IDYLLS ARE AS NATIONAL AS A JINGO POEM MISTER BIRKET FOSTER'S LANDSCAPES SMILE AT ONE MUCH IN THE SAME WAY THAT MISTER CARKER USED TO FLASH HIS TEETH AND MISTER JOHN COLLIER GIVES HIS SITTER A CHEERFUL SLAP ON THE BACK BEFORE HE SAYS LIKE A SHAMPOOER IN A TURKISH BATH NEXT MAN | LininILILSS PICTURES ARE AS SOR OF UPGARDS AN ADOM PINING AND MASSS EXQUISIT ITELLE'S ARE AS NIONAL AS A JINGO PEM MrISTER BRIRK AT FOSTERSS LANDSAPES SMYLE AT ONE MUCH IN THE SAME WAY THAT MISTER CARCER USED TO FASH HIS TETH AND MISTER JO COLLIER GIIVES HIS CIDTER A CHURFUL SLAP IN THE BACK BEFORE HE SAYS LIKE A SHAMPURER IN A TURKISH BATH NEXT MAN |\n| 5 | IT IS OBVIOUSLY UNNECESSARY FOR US TO POINT OUT HOW LUMINOUS THESE CRITICISMS ARE HOW DELICATE IN EXPRESSION | It IS OBVISUSLY UNNESSESSARY FOR US TO POINT OUT HOW LUMINUS THESE CRITICISMS ARE HOW DEELICATE IN EXPRESSION IS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - - - - - -- - - - - - - - -- - - - - - - - - -- |\n| 6 | ON THE GENERAL PRINCIPLES OF ART MISTER QUILTER WRITES WITH EQUAL LUCIDITY | On THE JENRAL PRINCEIPAL OF ART ANDISTER QUILTER RIGHTITEES WITH EQUIL LOUCIDITY - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |\n| 7 | PAINTING HE TELLS US IS OF A DIFFERENT QUALITY TO MATHEMATICS AND FINISH IN ART IS ADDING MORE FACT | PainINING HE TELLS US IS OF A DFFERENT QUALITY TO MHAMADICS AND FINISH IN ART IS ADDING MORE FACT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |\n| 8 | AS FOR ETCHINGS THEY ARE OF TWO KINDS BRITISH AND FOREIGN | As FOR ECHINGS TH ARE OF TWO KINDES BRIDISH AND FOREIGN)) IS - - - - - - - - - - - - - - - -)) - - - -) - - - - - - - - - - - - - - - - - - - - - - - - - - - - |\n| 9 | HE LAMENTS MOST BITTERLY THE DIVORCE THAT HAS BEEN MADE BETWEEN DECORATIVE ART AND WHAT WE USUALLY CALL PICTURES MAKES THE CUSTOMARY APPEAL TO THE LAST JUDGMENT AND REMINDS US THAT IN THE GREAT DAYS OF ART MICHAEL ANGELO WAS THE FURNISHING UPHOLSTERER | He LAMEN MOST BITTERLY THE DEIVORCE THAT HAS BEEN MADE BEWEEN DEECREDIVE ART AND WHAT WE USEALLY CALL PICTURES MAKES A CUSTOMER APPEALED THE LAST JGENT AND REMINDES US THAT IN THE GREAT DAYS OF ART MAGELO WAS THE FRINISHING APHOLSTTERER |\n",
|
| 182 |
+
"eval_runtime": 2.7415,
|
| 183 |
+
"eval_samples_per_second": 3.648,
|
| 184 |
+
"eval_steps_per_second": 0.73,
|
| 185 |
+
"eval_wer": 258.26771653543307,
|
| 186 |
+
"eval_wer_time": 1.4057037830352783,
|
| 187 |
+
"step": 20
|
| 188 |
}
|
| 189 |
],
|
| 190 |
"logging_steps": 1,
|
|
|
|
| 204 |
"attributes": {}
|
| 205 |
}
|
| 206 |
},
|
| 207 |
+
"total_flos": 2135452999680000.0,
|
| 208 |
"train_batch_size": 4,
|
| 209 |
"trial_name": null,
|
| 210 |
"trial_params": null
|