initial model commit
Browse files
README.md
CHANGED
|
@@ -104,16 +104,26 @@ So, the words "*Ich*" and "*they*" are labeled as **pronouns** (PRON), while "*l
|
|
| 104 |
The following Flair script was used to train this model:
|
| 105 |
|
| 106 |
```python
|
| 107 |
-
from flair.data import
|
| 108 |
-
from flair.datasets import
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
corpus
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# 2. what tag do we want to predict?
|
| 119 |
tag_type = 'upos'
|
|
@@ -140,7 +150,8 @@ from flair.models import SequenceTagger
|
|
| 140 |
tagger = SequenceTagger(hidden_size=256,
|
| 141 |
embeddings=embeddings,
|
| 142 |
tag_dictionary=tag_dictionary,
|
| 143 |
-
tag_type=tag_type
|
|
|
|
| 144 |
|
| 145 |
# 6. initialize trainer
|
| 146 |
from flair.trainers import ModelTrainer
|
|
|
|
| 104 |
The following Flair script was used to train this model:
|
| 105 |
|
| 106 |
```python
|
| 107 |
+
from flair.data import MultiCorpus
|
| 108 |
+
from flair.datasets import UD_ENGLISH, UD_GERMAN, UD_FRENCH, UD_ITALIAN, UD_POLISH, UD_DUTCH, UD_CZECH, \
|
| 109 |
+
UD_DANISH, UD_SPANISH, UD_SWEDISH, UD_NORWEGIAN, UD_FINNISH
|
| 110 |
+
from flair.embeddings import StackedEmbeddings, FlairEmbeddings
|
| 111 |
+
|
| 112 |
+
# 1. make a multi corpus consisting of 12 UD treebanks (in_memory=False here because this corpus becomes large)
|
| 113 |
+
corpus = MultiCorpus([
|
| 114 |
+
UD_ENGLISH(in_memory=False),
|
| 115 |
+
UD_GERMAN(in_memory=False),
|
| 116 |
+
UD_DUTCH(in_memory=False),
|
| 117 |
+
UD_FRENCH(in_memory=False),
|
| 118 |
+
UD_ITALIAN(in_memory=False),
|
| 119 |
+
UD_SPANISH(in_memory=False),
|
| 120 |
+
UD_POLISH(in_memory=False),
|
| 121 |
+
UD_CZECH(in_memory=False),
|
| 122 |
+
UD_DANISH(in_memory=False),
|
| 123 |
+
UD_SWEDISH(in_memory=False),
|
| 124 |
+
UD_NORWEGIAN(in_memory=False),
|
| 125 |
+
UD_FINNISH(in_memory=False),
|
| 126 |
+
])
|
| 127 |
|
| 128 |
# 2. what tag do we want to predict?
|
| 129 |
tag_type = 'upos'
|
|
|
|
| 150 |
tagger = SequenceTagger(hidden_size=256,
|
| 151 |
embeddings=embeddings,
|
| 152 |
tag_dictionary=tag_dictionary,
|
| 153 |
+
tag_type=tag_type,
|
| 154 |
+
use_crf=False)
|
| 155 |
|
| 156 |
# 6. initialize trainer
|
| 157 |
from flair.trainers import ModelTrainer
|