varox34 commited on
Commit
0b25125
·
verified ·
1 Parent(s): 366b225

Upload 2 files

Browse files
biaffine-parser-master/Train.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Creating and Training
2
+ ```
3
+ cd biaffine-parser-master
4
+ ```
5
+ ## Perl Installation
6
+
7
+ Follow the link for perl installation :
8
+ https://linuxhint.com/install-perl-ubuntu/
9
+
10
+ ## conllu_to_conllx.pl
11
+
12
+ Converts a file in the CoNLL-U format to the old CoNLL-X format.
13
+ ```
14
+ perl conllu_to_conllx.pl < file.conllu > file.conll
15
+ ```
16
+ Move the result .conllx file in <b>data/ptb/</b> folder
17
+
18
+ ## Creating tnt_pos_tagger.dill
19
+ ```
20
+ python3.7 hn_pos.py
21
+ ```
22
+ my_tagger.dill file is created rename it to tnt_pos_tagger.dill and move it to <b>models</b> folder
23
+
24
+ ## Training the model
25
+ ```
26
+ python3.7 run.py train -p --feat=bert --ftrain=data/ptb/tamtrain.conllx --ftest=data/ptb/tamtest.conllx --fdev=data/ptb/tamdev.conllx
27
+ ```
28
+ ## Evaluate the model
29
+ ```
30
+ python3.7 run.py evaluate --feat=bert --fdata=data/ptb/tamtest.conllx
31
+ ```
32
+ ## Prediction using Web App
33
+ Move to the upper directory and then run the web app
34
+ ```
35
+ cd ..
36
+ ```
37
+ ```
38
+ python3.7 app.py
39
+ ```
biaffine-parser-master/conllu_to_conllx.pl ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env perl
2
+ # Converts a CoNLL-U file (Universal Dependencies) to the older CoNLL-X format.
3
+ # The conversion is by definition lossy. It is a lightweight converter: we do not check for validity of the CoNLL-U input!
4
+ # Copyright © 2015, 2017 Dan Zeman <zeman@ufal.mff.cuni.cz>
5
+ # License: GNU GPL
6
+
7
+ use utf8;
8
+ use open ':utf8';
9
+ binmode(STDIN, ':utf8');
10
+ binmode(STDOUT, ':utf8');
11
+ binmode(STDERR, ':utf8');
12
+
13
+ while(<>)
14
+ {
15
+ # Discard sentence-level comment lines.
16
+ next if(m/^\#/);
17
+ # Discard lines of fused surface tokens. Syntactic words will be the node-level unit in the output file.
18
+ next if(m/^\d+-\d+/);
19
+ # Discard lines with empty nodes from the enhanced representation.
20
+ next if(m/^\d+\./);
21
+ if(m/\t/)
22
+ {
23
+ s/\r?\n$//;
24
+ my @fields = split(/\t/, $_);
25
+ # CoNLL-U v2 (December 2016) allows spaces in FORM and LEMMA but older tools may not survive it.
26
+ # Replace spaces by underscores.
27
+ $fields[1] =~ s/ /_/g;
28
+ $fields[2] =~ s/ /_/g;
29
+ # CoNLL-X specification did not allow POSTAG to be empty if there was CPOSTAG, and some tools rely on it.
30
+ # Also, some tools rely on POSTAG being a fine-grained version of CPOSTAG, i.e. CPOSTAG should be always
31
+ # inferrable from POSTAG. This is not an explicit requirement in the format specification but we will
32
+ # enforce it anyway.
33
+ # Copy CPOSTAG to POSTAG if POSTAG is empty. Otherwise, prepend CPOSTAG to POSTAG.
34
+ if($fields[4] eq '_')
35
+ {
36
+ $fields[4] = $fields[3];
37
+ }
38
+ else
39
+ {
40
+ $fields[4] = $fields[3].'_'.$fields[4];
41
+ }
42
+ # The last two columns ([8] and [9]) had different meaning in CoNLL-X.
43
+ # In many cases it is probably harmless to keep their contents from CoNLL-U, but some tools may rely on their expectations about these columns,
44
+ # especially in [8] they may require either '_' or a numeric value. Let's erase the contents of these columns to be on the safe side.
45
+ $fields[8] = $fields[9] = '_';
46
+ $_ = join("\t", @fields)."\n";
47
+ }
48
+ print;
49
+ }