| stages: |
| |
| install: |
| cmd: |
| - python3 -m venv venv |
| - source venv/bin/activate && pip install -U pip wheel setuptools && pip install -r requirements.txt |
| - cp requirements.txt venv-installed |
| deps: |
| - requirements.txt |
| outs: |
| - venv-installed: |
| cache: false |
| |
| load-vocab: |
| cmd: venv/bin/annif load-vocab --force yso corpora/yso-skos.ttl |
| deps: |
| - venv-installed |
| - corpora/yso-skos.ttl |
| outs: |
| - data/vocabs/yso |
| |
| train-mllm: |
| foreach: |
| - fi |
| - sv |
| - en |
| do: |
| cmd: venv/bin/annif train yso-mllm-${item} corpora/fulltext-train/${item}/*/ -j 16 |
| deps: |
| - venv-installed |
| - corpora/fulltext-train/${item} |
| - data/vocabs/yso |
| params: |
| - projects.toml: |
| - yso-mllm-${item} |
| outs: |
| - data/projects/yso-mllm-${item} |
| |
| train-omikuji: |
| foreach: |
| - fi |
| - sv |
| - en |
| do: |
| cmd: venv/bin/annif train yso-bonsai-${item} corpora/shorttext-train/${item}/yso-finna-${item}*.tsv.gz |
| deps: |
| - venv-installed |
| - corpora/shorttext-train/${item}/ |
| - data/vocabs/yso |
| params: |
| - projects.toml: |
| - yso-bonsai-${item} |
| outs: |
| - data/projects/yso-bonsai-${item} |
| |
| train-fasttext: |
| foreach: |
| - fi |
| - sv |
| - en |
| do: |
| cmd: venv/bin/annif train yso-fasttext-${item} corpora/shorttext-train/${item}/yso-finna-${item}*.tsv.gz |
| deps: |
| - venv-installed |
| - corpora/shorttext-train/${item}/ |
| - data/vocabs/yso |
| params: |
| - projects.toml: |
| - yso-fasttext-${item} |
| outs: |
| - data/projects/yso-fasttext-${item} |
| |
| train-nn-ensemble: |
| foreach: |
| - fi |
| - sv |
| - en |
| do: |
| cmd: venv/bin/annif train yso-${item} corpora/fulltext-train/${item}/*/ -j 16 |
| deps: |
| - venv-installed |
| - corpora/fulltext-train/${item} |
| - data/vocabs/yso |
| - data/projects/yso-mllm-${item} |
| - data/projects/yso-bonsai-${item} |
| - data/projects/yso-fasttext-${item} |
| params: |
| - projects.toml: |
| - yso-${item} |
| outs: |
| - data/projects/yso-${item} |
| |
| eval-fi: |
| foreach: |
| - mllm-fi |
| - bonsai-fi |
| - fasttext-fi |
| - fi |
| do: |
| cmd: |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-jyu-theses.json corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-kirjaesittelyt2021.json corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-${item} -j 1 -m F1@5 -m NDCG --metrics-file reports/${item}-kirjastonhoitaja.json corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-satakunnan-kansa.json corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-vapaakappaleet-orig.json corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - venv-installed |
| - corpora/fulltext-test/fi |
| - data/projects/yso-${item} |
| params: |
| - projects.toml: |
| - yso-${item} |
| metrics: |
| - reports/${item}-jyu-theses.json: |
| cache: false |
| - reports/${item}-kirjaesittelyt2021.json: |
| cache: false |
| - reports/${item}-kirjastonhoitaja.json: |
| cache: false |
| - reports/${item}-satakunnan-kansa.json: |
| cache: false |
| - reports/${item}-vapaakappaleet-orig.json: |
| cache: false |
| |
| eval-sv: |
| foreach: |
| - mllm-sv |
| - bonsai-sv |
| - fasttext-sv |
| - sv |
| do: |
| cmd: |
| - venv/bin/annif eval yso-${item} -j 1 -m F1@5 -m NDCG --metrics-file reports/${item}-abo-theses.json corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-${item} -j 1 -m F1@5 -m NDCG --metrics-file reports/${item}-jyu-theses.json corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-kirjaesittelyt2021.json corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-vapaakappaleet-orig.json corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - venv-installed |
| - corpora/fulltext-test/sv |
| - data/projects/yso-${item} |
| params: |
| - projects.toml: |
| - yso-${item} |
| metrics: |
| - reports/${item}-abo-theses.json: |
| cache: false |
| - reports/${item}-jyu-theses.json: |
| cache: false |
| - reports/${item}-kirjaesittelyt2021.json: |
| cache: false |
| - reports/${item}-vapaakappaleet-orig.json: |
| cache: false |
| |
| eval-en: |
| foreach: |
| - mllm-en |
| - bonsai-en |
| - fasttext-en |
| - en |
| do: |
| cmd: |
| - venv/bin/annif eval yso-${item} -j 1 -m F1@5 -m NDCG --metrics-file reports/${item}-abo-theses.json corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-${item} -j 1 -m F1@5 -m NDCG --metrics-file reports/${item}-jyu-theses.json corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-kirjaesittelyt2021.json corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-${item} -j 10 -m F1@5 -m NDCG --metrics-file reports/${item}-vapaakappaleet-orig.json corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - venv-installed |
| - corpora/fulltext-test/en |
| - data/projects/yso-${item} |
| params: |
| - projects.toml: |
| - yso-${item} |
| metrics: |
| - reports/${item}-abo-theses.json: |
| cache: false |
| - reports/${item}-jyu-theses.json: |
| cache: false |
| - reports/${item}-kirjaesittelyt2021.json: |
| cache: false |
| - reports/${item}-vapaakappaleet-orig.json: |
| cache: false |
|
|