lighthousefeed
/

yoda-ner

+2022-10-01 00:23:25,105 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,107 Model: "SequenceTagger(
+  (embeddings): StackedEmbeddings(
+    (list_embedding_0): TransformerWordEmbeddings(
+      (model): BertModel(
+        (embeddings): BertEmbeddings(
+          (word_embeddings): Embedding(119547, 768, padding_idx=0)
+          (position_embeddings): Embedding(512, 768)
+          (token_type_embeddings): Embedding(2, 768)
+          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (encoder): BertEncoder(
+          (layer): ModuleList(
+            (0): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (1): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (2): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (3): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (4): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (5): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (6): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (7): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (8): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (9): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (10): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (11): BertLayer(
+              (attention): BertAttention(
+                (self): BertSelfAttention(
+                  (query): Linear(in_features=768, out_features=768, bias=True)
+                  (key): Linear(in_features=768, out_features=768, bias=True)
+                  (value): Linear(in_features=768, out_features=768, bias=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+                (output): BertSelfOutput(
+                  (dense): Linear(in_features=768, out_features=768, bias=True)
+                  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                  (dropout): Dropout(p=0.1, inplace=False)
+                )
+              )
+              (intermediate): BertIntermediate(
+                (dense): Linear(in_features=768, out_features=3072, bias=True)
+                (intermediate_act_fn): GELUActivation()
+              )
+              (output): BertOutput(
+                (dense): Linear(in_features=3072, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+          )
+        )
+        (pooler): BertPooler(
+          (dense): Linear(in_features=768, out_features=768, bias=True)
+          (activation): Tanh()
+        )
+      )
+    )
+    (list_embedding_1): FlairEmbeddings(
+      (lm): LanguageModel(
+        (drop): Dropout(p=0.5, inplace=False)
+        (encoder): Embedding(275, 100)
+        (rnn): LSTM(100, 1024)
+        (decoder): Linear(in_features=1024, out_features=275, bias=True)
+      )
+    )
+    (list_embedding_2): FlairEmbeddings(
+      (lm): LanguageModel(
+        (drop): Dropout(p=0.5, inplace=False)
+        (encoder): Embedding(275, 100)
+        (rnn): LSTM(100, 1024)
+        (decoder): Linear(in_features=1024, out_features=275, bias=True)
+      )
+    )
+  )
+  (word_dropout): WordDropout(p=0.05)
+  (locked_dropout): LockedDropout(p=0.5)
+  (embedding2nn): Linear(in_features=2816, out_features=2816, bias=True)
+  (linear): Linear(in_features=2816, out_features=13, bias=True)
+  (loss_function): CrossEntropyLoss()
+)"
+2022-10-01 00:23:25,114 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,115 Corpus: "Corpus: 70000 train + 15000 dev + 15000 test sentences"
+2022-10-01 00:23:25,115 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,115 Parameters:
+2022-10-01 00:23:25,116  - learning_rate: "0.010000"
+2022-10-01 00:23:25,116  - mini_batch_size: "8"
+2022-10-01 00:23:25,116  - patience: "3"
+2022-10-01 00:23:25,116  - anneal_factor: "0.5"
+2022-10-01 00:23:25,116  - max_epochs: "2"
+2022-10-01 00:23:25,116  - shuffle: "True"
+2022-10-01 00:23:25,117  - train_with_dev: "False"
+2022-10-01 00:23:25,117  - batch_growth_annealing: "False"
+2022-10-01 00:23:25,117 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,117 Model training base path: "c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word"
+2022-10-01 00:23:25,117 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,118 Device: cuda:0
+2022-10-01 00:23:25,118 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:23:25,118 Embeddings storage mode: cpu
+2022-10-01 00:23:25,119 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:25:10,652 epoch 1 - iter 875/8750 - loss 0.52734710 - samples/sec: 66.36 - lr: 0.010000
+2022-10-01 00:26:56,050 epoch 1 - iter 1750/8750 - loss 0.40571165 - samples/sec: 66.45 - lr: 0.010000
+2022-10-01 00:28:42,758 epoch 1 - iter 2625/8750 - loss 0.33981350 - samples/sec: 65.63 - lr: 0.010000
+2022-10-01 00:30:27,826 epoch 1 - iter 3500/8750 - loss 0.29553411 - samples/sec: 66.66 - lr: 0.010000
+2022-10-01 00:32:13,605 epoch 1 - iter 4375/8750 - loss 0.26472648 - samples/sec: 66.21 - lr: 0.010000
+2022-10-01 00:33:58,962 epoch 1 - iter 5250/8750 - loss 0.24119392 - samples/sec: 66.47 - lr: 0.010000
+2022-10-01 00:35:44,264 epoch 1 - iter 6125/8750 - loss 0.22350560 - samples/sec: 66.50 - lr: 0.010000
+2022-10-01 00:37:29,676 epoch 1 - iter 7000/8750 - loss 0.20938707 - samples/sec: 66.43 - lr: 0.010000
+2022-10-01 00:39:17,828 epoch 1 - iter 7875/8750 - loss 0.19801233 - samples/sec: 64.75 - lr: 0.010000
+2022-10-01 00:41:05,621 epoch 1 - iter 8750/8750 - loss 0.18900810 - samples/sec: 64.98 - lr: 0.010000
+2022-10-01 00:41:05,624 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:41:05,624 EPOCH 1 done: loss 0.1890 - lr 0.010000
+2022-10-01 00:43:16,083 Evaluating as a multi-label problem: False
+2022-10-01 00:43:16,227 DEV : loss 0.06317088007926941 - f1-score (micro avg)  0.9585
+2022-10-01 00:43:17,308 BAD EPOCHS (no improvement): 0
+2022-10-01 00:43:17,309 saving best model
+2022-10-01 00:43:18,885 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:45:00,373 epoch 2 - iter 875/8750 - loss 0.09938527 - samples/sec: 69.02 - lr: 0.010000
+2022-10-01 00:46:39,918 epoch 2 - iter 1750/8750 - loss 0.09782604 - samples/sec: 70.36 - lr: 0.010000
+2022-10-01 00:48:19,288 epoch 2 - iter 2625/8750 - loss 0.09732946 - samples/sec: 70.50 - lr: 0.010000
+2022-10-01 00:49:56,913 epoch 2 - iter 3500/8750 - loss 0.09652202 - samples/sec: 71.76 - lr: 0.010000
+2022-10-01 00:51:35,781 epoch 2 - iter 4375/8750 - loss 0.09592801 - samples/sec: 70.86 - lr: 0.010000
+2022-10-01 00:53:12,838 epoch 2 - iter 5250/8750 - loss 0.09478132 - samples/sec: 72.17 - lr: 0.010000
+2022-10-01 00:54:49,247 epoch 2 - iter 6125/8750 - loss 0.09405506 - samples/sec: 72.65 - lr: 0.010000
+2022-10-01 00:56:26,656 epoch 2 - iter 7000/8750 - loss 0.09270363 - samples/sec: 71.90 - lr: 0.010000
+2022-10-01 00:58:04,050 epoch 2 - iter 7875/8750 - loss 0.09222568 - samples/sec: 71.92 - lr: 0.010000
+2022-10-01 00:59:41,351 epoch 2 - iter 8750/8750 - loss 0.09155321 - samples/sec: 71.98 - lr: 0.010000
+2022-10-01 00:59:41,359 ----------------------------------------------------------------------------------------------------
+2022-10-01 00:59:41,360 EPOCH 2 done: loss 0.0916 - lr 0.010000
+2022-10-01 01:01:38,941 Evaluating as a multi-label problem: False
+2022-10-01 01:01:39,054 DEV : loss 0.04371843859553337 - f1-score (micro avg)  0.9749
+2022-10-01 01:01:40,056 BAD EPOCHS (no improvement): 0
+2022-10-01 01:01:40,058 saving best model
+2022-10-01 01:01:42,979 ----------------------------------------------------------------------------------------------------
+2022-10-01 01:01:42,986 loading file c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word\best-model.pt
+2022-10-01 01:01:46,879 SequenceTagger predicts: Dictionary with 13 tags: O, S-brand, B-brand, E-brand, I-brand, S-size, B-size, E-size, I-size, S-color, B-color, E-color, I-color
+2022-10-01 01:03:40,258 Evaluating as a multi-label problem: False
+2022-10-01 01:03:40,388 0.9719	0.9777	0.9748	0.951
+2022-10-01 01:03:40,389
+Results:
+- F-score (micro) 0.9748
+- F-score (macro) 0.9624
+- Accuracy 0.951
+By class:
+              precision    recall  f1-score   support
+       brand     0.9779    0.9849    0.9814     11779
+        size     0.9780    0.9821    0.9800      3125
+       color     0.9249    0.9264    0.9256      1915
+   micro avg     0.9719    0.9777    0.9748     16819
+   macro avg     0.9603    0.9644    0.9624     16819
+weighted avg     0.9719    0.9777    0.9748     16819
+2022-10-01 01:03:40,391 ----------------------------------------------------------------------------------------------------