Upload 2 files
Browse files
biaffine-parser-master/Train.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Creating and Training
|
| 2 |
+
```
|
| 3 |
+
cd biaffine-parser-master
|
| 4 |
+
```
|
| 5 |
+
## Perl Installation
|
| 6 |
+
|
| 7 |
+
Follow the link for perl installation :
|
| 8 |
+
https://linuxhint.com/install-perl-ubuntu/
|
| 9 |
+
|
| 10 |
+
## conllu_to_conllx.pl
|
| 11 |
+
|
| 12 |
+
Converts a file in the CoNLL-U format to the old CoNLL-X format.
|
| 13 |
+
```
|
| 14 |
+
perl conllu_to_conllx.pl < file.conllu > file.conll
|
| 15 |
+
```
|
| 16 |
+
Move the result .conllx file in <b>data/ptb/</b> folder
|
| 17 |
+
|
| 18 |
+
## Creating tnt_pos_tagger.dill
|
| 19 |
+
```
|
| 20 |
+
python3.7 hn_pos.py
|
| 21 |
+
```
|
| 22 |
+
my_tagger.dill file is created rename it to tnt_pos_tagger.dill and move it to <b>models</b> folder
|
| 23 |
+
|
| 24 |
+
## Training the model
|
| 25 |
+
```
|
| 26 |
+
python3.7 run.py train -p --feat=bert --ftrain=data/ptb/tamtrain.conllx --ftest=data/ptb/tamtest.conllx --fdev=data/ptb/tamdev.conllx
|
| 27 |
+
```
|
| 28 |
+
## Evaluate the model
|
| 29 |
+
```
|
| 30 |
+
python3.7 run.py evaluate --feat=bert --fdata=data/ptb/tamtest.conllx
|
| 31 |
+
```
|
| 32 |
+
## Prediction using Web App
|
| 33 |
+
Move to the upper directory and then run the web app
|
| 34 |
+
```
|
| 35 |
+
cd ..
|
| 36 |
+
```
|
| 37 |
+
```
|
| 38 |
+
python3.7 app.py
|
| 39 |
+
```
|
biaffine-parser-master/conllu_to_conllx.pl
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env perl
|
| 2 |
+
# Converts a CoNLL-U file (Universal Dependencies) to the older CoNLL-X format.
|
| 3 |
+
# The conversion is by definition lossy. It is a lightweight converter: we do not check for validity of the CoNLL-U input!
|
| 4 |
+
# Copyright © 2015, 2017 Dan Zeman <zeman@ufal.mff.cuni.cz>
|
| 5 |
+
# License: GNU GPL
|
| 6 |
+
|
| 7 |
+
use utf8;
|
| 8 |
+
use open ':utf8';
|
| 9 |
+
binmode(STDIN, ':utf8');
|
| 10 |
+
binmode(STDOUT, ':utf8');
|
| 11 |
+
binmode(STDERR, ':utf8');
|
| 12 |
+
|
| 13 |
+
while(<>)
|
| 14 |
+
{
|
| 15 |
+
# Discard sentence-level comment lines.
|
| 16 |
+
next if(m/^\#/);
|
| 17 |
+
# Discard lines of fused surface tokens. Syntactic words will be the node-level unit in the output file.
|
| 18 |
+
next if(m/^\d+-\d+/);
|
| 19 |
+
# Discard lines with empty nodes from the enhanced representation.
|
| 20 |
+
next if(m/^\d+\./);
|
| 21 |
+
if(m/\t/)
|
| 22 |
+
{
|
| 23 |
+
s/\r?\n$//;
|
| 24 |
+
my @fields = split(/\t/, $_);
|
| 25 |
+
# CoNLL-U v2 (December 2016) allows spaces in FORM and LEMMA but older tools may not survive it.
|
| 26 |
+
# Replace spaces by underscores.
|
| 27 |
+
$fields[1] =~ s/ /_/g;
|
| 28 |
+
$fields[2] =~ s/ /_/g;
|
| 29 |
+
# CoNLL-X specification did not allow POSTAG to be empty if there was CPOSTAG, and some tools rely on it.
|
| 30 |
+
# Also, some tools rely on POSTAG being a fine-grained version of CPOSTAG, i.e. CPOSTAG should be always
|
| 31 |
+
# inferrable from POSTAG. This is not an explicit requirement in the format specification but we will
|
| 32 |
+
# enforce it anyway.
|
| 33 |
+
# Copy CPOSTAG to POSTAG if POSTAG is empty. Otherwise, prepend CPOSTAG to POSTAG.
|
| 34 |
+
if($fields[4] eq '_')
|
| 35 |
+
{
|
| 36 |
+
$fields[4] = $fields[3];
|
| 37 |
+
}
|
| 38 |
+
else
|
| 39 |
+
{
|
| 40 |
+
$fields[4] = $fields[3].'_'.$fields[4];
|
| 41 |
+
}
|
| 42 |
+
# The last two columns ([8] and [9]) had different meaning in CoNLL-X.
|
| 43 |
+
# In many cases it is probably harmless to keep their contents from CoNLL-U, but some tools may rely on their expectations about these columns,
|
| 44 |
+
# especially in [8] they may require either '_' or a numeric value. Let's erase the contents of these columns to be on the safe side.
|
| 45 |
+
$fields[8] = $fields[9] = '_';
|
| 46 |
+
$_ = join("\t", @fields)."\n";
|
| 47 |
+
}
|
| 48 |
+
print;
|
| 49 |
+
}
|