| #!/bin/sh |
|
|
| |
| mkdir -p la_evalatin20 |
| git -C la_evalatin20 clone --depth=1 https://github.com/CIRCSE/LT4HALA |
|
|
| echo "# variant = la_evalatin20" >la_evalatin20/la_evalatin20-train.conllu |
| echo "# variant = la_evalatin20" >la_evalatin20/la_evalatin20-dev.conllu |
|
|
| python3 get_evalatin20_split.py la_evalatin20/LT4HALA/data_and_doc/training_data_10-12-2019/*.conllu \ |
| --train=la_evalatin20/la_evalatin20-train.conllu --dev=la_evalatin20/la_evalatin20-dev.conllu |
|
|
| echo "# variant = la_ittb" >>la_evalatin20/la_evalatin20-train.conllu |
| python3 get_evalatin20_split.py ../ud-2.6/la_ittb/la_ittb-ud-train.conllu --ratio 0.6 --train=la_evalatin20/la_evalatin20-train.conllu --dev=/dev/null |
|
|
| echo "# variant = la_perseus" >>la_evalatin20/la_evalatin20-train.conllu |
| python3 get_evalatin20_split.py ../ud-2.6/la_perseus/la_perseus-ud-train.conllu --ratio 1 --train=la_evalatin20/la_evalatin20-train.conllu --dev=/dev/null |
|
|
| echo "# variant = la_proiel" >>la_evalatin20/la_evalatin20-train.conllu |
| python3 get_evalatin20_split.py ../ud-2.6/la_proiel/la_proiel-ud-train.conllu --ratio 1 --train=la_evalatin20/la_evalatin20-train.conllu --dev=/dev/null |
|
|
| sed ' |
| s#^14\tnescio\tnescio\tnescio\tVERB#14\tnescio\tnescio\tVERB# |
| s#^9\tne\tPART#9\tne\tne\tPART# |
| s#\(\t[^\t]*\)\{6\}$#\t_\t_\t_\t_\t_\t_# |
| ' -i la_evalatin20/la_evalatin20-train.conllu |
|
|
| echo "# variant = la_evalatin20" >la_evalatin20/la_evalatin20-test.conllu |
| echo "# variant = la_evalatin20" >la_evalatin20/la_evalatin20-test-crossgenre.conllu |
| echo "# variant = la_evalatin20" >la_evalatin20/la_evalatin20-test-crosstime.conllu |
| for f in la_evalatin20/LT4HALA/data_and_doc/gold_EvaLatin/*.conllu; do |
| case "$(basename $f)" in |
| Horatius-Carmina_GOLD.conllu) name=test-crossgenre;; |
| SummaContraGentiles_IV_GOLD.conllu) name=test-crosstime;; |
| *) name=test;; |
| esac |
| (echo "# newdoc id = $(basename $f)"; cat $f) >>la_evalatin20/la_evalatin20-$name.conllu |
| done |
|
|
| for conllu in la_evalatin20/la_evalatin20-dev.conllu la_evalatin20/la_evalatin20-test*.conllu; do |
| perl ../ud-2.6/conllu_to_text.pl --language=la <$conllu >${conllu%.conllu}.txt |
| done |
|
|
| rm -rf la_evalatin20/LT4HALA |
|
|