| schema: '2.0' |
| stages: |
| install: |
| cmd: |
| - python3 -m venv venv |
| - source venv/bin/activate && pip install -U pip wheel setuptools && pip install |
| -r requirements.txt |
| - cp requirements.txt venv-installed |
| deps: |
| - path: requirements.txt |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| outs: |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| load-vocab: |
| cmd: venv/bin/annif load-vocab --force yso corpora/yso-skos.ttl |
| deps: |
| - path: corpora/yso-skos.ttl |
| hash: md5 |
| md5: c3d9a5148c46efa4fbf11ee866154ebf |
| size: 32953533 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| outs: |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| train-mllm@fi: |
| cmd: venv/bin/annif train yso-mllm-fi corpora/fulltext-train/fi/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/fi |
| hash: md5 |
| md5: f5c1820afb398fa8145181cf22905336.dir |
| size: 413860133 |
| nfiles: 5583 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-fi: |
| name: YSO MLLM Finnish |
| language: fi |
| backend: mllm |
| analyzer: voikko(fi) |
| vocab: yso |
| limit: '1000' |
| transform: limit(3000000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-mllm-fi |
| hash: md5 |
| md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir |
| size: 36157873 |
| nfiles: 2 |
| train-mllm@en: |
| cmd: venv/bin/annif train yso-mllm-en corpora/fulltext-train/en/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/en |
| hash: md5 |
| md5: 426f141f77c5bac77b32784e4b827d31.dir |
| size: 268351812 |
| nfiles: 4584 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-en: |
| name: YSO MLLM English |
| language: en |
| backend: mllm |
| analyzer: snowball(english) |
| vocab: yso |
| limit: '1000' |
| transform: limit(2500000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-mllm-en |
| hash: md5 |
| md5: 129793bd06d231413a66ed5611180dbe.dir |
| size: 39175771 |
| nfiles: 2 |
| train-mllm@sv: |
| cmd: venv/bin/annif train yso-mllm-sv corpora/fulltext-train/sv/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/sv |
| hash: md5 |
| md5: c64480b5f34b1895db972d774abe12cc.dir |
| size: 155098642 |
| nfiles: 3754 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-sv: |
| name: YSO MLLM Swedish |
| language: sv |
| backend: mllm |
| analyzer: snowball(swedish) |
| vocab: yso |
| limit: '1000' |
| transform: limit(3000000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-mllm-sv |
| hash: md5 |
| md5: 88dd945c235bdfc4549fa44f5ea582a4.dir |
| size: 19736546 |
| nfiles: 2 |
| train-omikuji@sv: |
| cmd: venv/bin/annif train yso-bonsai-sv corpora/shorttext-train/sv/yso-finna-sv*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/sv/ |
| hash: md5 |
| md5: 33a49e42ec12daf0c973c792928a2cb0.dir |
| size: 40515364 |
| nfiles: 3 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-sv: |
| name: YSO Omikuji Bonsai Swedish |
| language: sv |
| backend: omikuji |
| analyzer: snowball(swedish) |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '2' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-bonsai-sv |
| hash: md5 |
| md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir |
| size: 1029311381 |
| nfiles: 6 |
| train-omikuji@en: |
| cmd: venv/bin/annif train yso-bonsai-en corpora/shorttext-train/en/yso-finna-en*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/en/ |
| hash: md5 |
| md5: 19e76af78210f39cf02a5c8bbee38f60.dir |
| size: 98829194 |
| nfiles: 3 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-en: |
| name: YSO Omikuji Bonsai English |
| language: en |
| backend: omikuji |
| analyzer: simple |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '5' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-bonsai-en |
| hash: md5 |
| md5: b1a5a925fbe1a11c153ce3133176c717.dir |
| size: 2321603849 |
| nfiles: 6 |
| train-omikuji@fi: |
| cmd: venv/bin/annif train yso-bonsai-fi corpora/shorttext-train/fi/yso-finna-fi*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/fi/ |
| hash: md5 |
| md5: 5a2f47124433a7215e6c391a48c0aeca.dir |
| size: 358561386 |
| nfiles: 9 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-fi: |
| name: YSO Omikuji Bonsai Finnish |
| language: fi |
| backend: omikuji |
| analyzer: snowball(finnish) |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '5' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-bonsai-fi |
| hash: md5 |
| md5: 84a1ae3ca24702d0cbb901f3215e1675.dir |
| size: 5354349482 |
| nfiles: 6 |
| train-fasttext@sv: |
| cmd: venv/bin/annif train yso-fasttext-sv corpora/shorttext-train/sv/yso-finna-sv*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/sv/ |
| hash: md5 |
| md5: 33a49e42ec12daf0c973c792928a2cb0.dir |
| size: 40515364 |
| nfiles: 3 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-sv: |
| name: YSO fastText Swedish |
| language: sv |
| backend: fasttext |
| analyzer: snowball(swedish) |
| dim: '560' |
| lr: '0.974349' |
| epoch: '110' |
| minn: '2' |
| maxn: '6' |
| minCount: '2' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-fasttext-sv |
| hash: md5 |
| md5: eccbf5ba8ce07f2777b4bee583ed783b.dir |
| size: 4913883852 |
| nfiles: 2 |
| train-fasttext@fi: |
| cmd: venv/bin/annif train yso-fasttext-fi corpora/shorttext-train/fi/yso-finna-fi*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/fi/ |
| hash: md5 |
| md5: 5a2f47124433a7215e6c391a48c0aeca.dir |
| size: 358561386 |
| nfiles: 9 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-fi: |
| name: YSO fastText Finnish |
| language: fi |
| backend: fasttext |
| analyzer: voikko(fi) |
| dim: '660' |
| lr: '0.506539' |
| epoch: '75' |
| minn: '2' |
| maxn: '7' |
| minCount: '2' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-fasttext-fi |
| hash: md5 |
| md5: 06ea14ea97351f72fbaeb13517011c88.dir |
| size: 7547313091 |
| nfiles: 2 |
| train-fasttext@en: |
| cmd: venv/bin/annif train yso-fasttext-en corpora/shorttext-train/en/yso-finna-en*.tsv.gz |
| deps: |
| - path: corpora/shorttext-train/en/ |
| hash: md5 |
| md5: 19e76af78210f39cf02a5c8bbee38f60.dir |
| size: 98829194 |
| nfiles: 3 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-en: |
| name: YSO fastText English |
| language: en |
| backend: fasttext |
| analyzer: snowball(english) |
| dim: '430' |
| lr: '0.506539' |
| epoch: '115' |
| minn: '4' |
| maxn: '5' |
| minCount: '1' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: data/projects/yso-fasttext-en |
| hash: md5 |
| md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir |
| size: 4091197300 |
| nfiles: 2 |
| train-nn-ensemble@sv: |
| cmd: venv/bin/annif train yso-sv corpora/fulltext-train/sv/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/sv |
| hash: md5 |
| md5: c64480b5f34b1895db972d774abe12cc.dir |
| size: 155098642 |
| nfiles: 3754 |
| - path: data/projects/yso-bonsai-sv |
| hash: md5 |
| md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir |
| size: 1029311381 |
| nfiles: 6 |
| - path: data/projects/yso-fasttext-sv |
| hash: md5 |
| md5: eccbf5ba8ce07f2777b4bee583ed783b.dir |
| size: 4913883852 |
| nfiles: 2 |
| - path: data/projects/yso-mllm-sv |
| hash: md5 |
| md5: 88dd945c235bdfc4549fa44f5ea582a4.dir |
| size: 19736546 |
| nfiles: 2 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-sv: |
| name: ALLFO svenska (2023.1.Ghosha) |
| language: sv |
| backend: nn_ensemble |
| sources: yso-mllm-sv:0.1439,yso-fasttext-sv:0.3302,yso-bonsai-sv:0.5259 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: data/projects/yso-sv |
| hash: md5 |
| md5: 9cab94546614acf5ac28514687cf26ad.dir |
| size: 1259460957 |
| nfiles: 3 |
| train-nn-ensemble@en: |
| cmd: venv/bin/annif train yso-en corpora/fulltext-train/en/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/en |
| hash: md5 |
| md5: 426f141f77c5bac77b32784e4b827d31.dir |
| size: 268351812 |
| nfiles: 4584 |
| - path: data/projects/yso-bonsai-en |
| hash: md5 |
| md5: b1a5a925fbe1a11c153ce3133176c717.dir |
| size: 2321603849 |
| nfiles: 6 |
| - path: data/projects/yso-fasttext-en |
| hash: md5 |
| md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir |
| size: 4091197300 |
| nfiles: 2 |
| - path: data/projects/yso-mllm-en |
| hash: md5 |
| md5: 129793bd06d231413a66ed5611180dbe.dir |
| size: 39175771 |
| nfiles: 2 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-en: |
| name: YSO English (2023.1.Ghosha) |
| language: en |
| backend: nn_ensemble |
| sources: yso-mllm-en:0.3426,yso-fasttext-en:0.1419,yso-bonsai-en:0.5155 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: data/projects/yso-en |
| hash: md5 |
| md5: cf09853815a7e8c621b352c2bc70f9e1.dir |
| size: 1259460957 |
| nfiles: 3 |
| train-nn-ensemble@fi: |
| cmd: venv/bin/annif train yso-fi corpora/fulltext-train/fi/*/ -j 16 |
| deps: |
| - path: corpora/fulltext-train/fi |
| hash: md5 |
| md5: f5c1820afb398fa8145181cf22905336.dir |
| size: 413860133 |
| nfiles: 5583 |
| - path: data/projects/yso-bonsai-fi |
| hash: md5 |
| md5: 84a1ae3ca24702d0cbb901f3215e1675.dir |
| size: 5354349482 |
| nfiles: 6 |
| - path: data/projects/yso-fasttext-fi |
| hash: md5 |
| md5: 06ea14ea97351f72fbaeb13517011c88.dir |
| size: 7547313091 |
| nfiles: 2 |
| - path: data/projects/yso-mllm-fi |
| hash: md5 |
| md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir |
| size: 36157873 |
| nfiles: 2 |
| - path: data/vocabs/yso |
| hash: md5 |
| md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir |
| size: 61626265 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fi: |
| name: YSO suomi (2023.1.Ghosha) |
| language: fi |
| backend: nn_ensemble |
| sources: yso-mllm-fi:0.1492,yso-fasttext-fi:0.6090,yso-bonsai-fi:0.2418 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: data/projects/yso-fi |
| hash: md5 |
| md5: a0961bd4dfcfee8367b22ad17d780dbb.dir |
| size: 1259460957 |
| nfiles: 3 |
| eval-fi@mllm: |
| cmd: |
| - venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-mllm-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| md5: 169cae26a0b93733aed807f8e9d9ca40.dir |
| size: 385626157 |
| nfiles: 7267 |
| - path: data/projects/yso-mllm-fi |
| md5: c42dd794bb146ecf7cf60e5f49167ab2.dir |
| size: 36078206 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/mllm-fi-jyu-theses.json |
| md5: 017241308807e76f5c4ecc675c280bfa |
| size: 92 |
| - path: reports/mllm-fi-kirjaesittelyt2021.json |
| md5: b289df9d0158a0e8c6cd74dd2df7d9c2 |
| size: 94 |
| - path: reports/mllm-fi-kirjastonhoitaja.json |
| md5: 1ed10e3905a72ffe5181bc74ec27599d |
| size: 93 |
| - path: reports/mllm-fi-satakunnan-kansa.json |
| md5: 7100b02bb18dad3f8e8d073b7bff9e50 |
| size: 93 |
| - path: reports/mllm-fi-vapaakappaleet-orig.json |
| md5: bb3ed842b8468708c1c064452b6b24f1 |
| size: 93 |
| eval-fi@bonsai: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-bonsai-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| md5: 169cae26a0b93733aed807f8e9d9ca40.dir |
| size: 385626157 |
| nfiles: 7267 |
| - path: data/projects/yso-bonsai-fi |
| md5: e065094c56ff3a61e2a4648e47156c8f.dir |
| size: 5277448954 |
| nfiles: 6 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/bonsai-fi-jyu-theses.json |
| md5: 660095be4380591aa8c7f839ffbf1aa4 |
| size: 92 |
| - path: reports/bonsai-fi-kirjaesittelyt2021.json |
| md5: 61540b3360540911b4ec3a934abb74ca |
| size: 94 |
| - path: reports/bonsai-fi-kirjastonhoitaja.json |
| md5: 95696330bd44571520752bdb4a460287 |
| size: 93 |
| - path: reports/bonsai-fi-satakunnan-kansa.json |
| md5: 2bb5923811b0760695cb5ad2465469e5 |
| size: 93 |
| - path: reports/bonsai-fi-vapaakappaleet-orig.json |
| md5: d369966bf45650f6b9643469343ab65d |
| size: 93 |
| eval-fi@fasttext: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-fasttext-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| md5: 169cae26a0b93733aed807f8e9d9ca40.dir |
| size: 385626157 |
| nfiles: 7267 |
| - path: data/projects/yso-fasttext-fi |
| md5: 0f5c9eb966671d610e8d8556b270652c.dir |
| size: 7519964475 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/fasttext-fi-jyu-theses.json |
| md5: 339a05b2faa098074a7717ef6677048b |
| size: 93 |
| - path: reports/fasttext-fi-kirjaesittelyt2021.json |
| md5: b0425400c2513d8580bf726bc0b487d3 |
| size: 94 |
| - path: reports/fasttext-fi-kirjastonhoitaja.json |
| md5: 7fe1e06726e05effef46a1cb97078d72 |
| size: 92 |
| - path: reports/fasttext-fi-satakunnan-kansa.json |
| md5: 63b1b03ad124c17bc3570188f1855ec9 |
| size: 93 |
| - path: reports/fasttext-fi-vapaakappaleet-orig.json |
| md5: 0acd6e385633742bd18513109a5a87dc |
| size: 94 |
| eval-sv@mllm: |
| cmd: |
| - venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| md5: 2d29064639aa0f8900e4bf58a781826e.dir |
| size: 66510456 |
| nfiles: 1876 |
| - path: data/projects/yso-mllm-sv |
| md5: fc2e344bac4abd0048c7c286ffdba0eb.dir |
| size: 19904278 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/mllm-sv-abo-theses.json |
| md5: 0d6f871b92dfb263c56e17f644e99513 |
| size: 92 |
| - path: reports/mllm-sv-jyu-theses.json |
| md5: 81f8eb0a6908169aeb2ba236a0baf3f7 |
| size: 91 |
| - path: reports/mllm-sv-kirjaesittelyt2021.json |
| md5: 0886460a79805dd7698d6c7f60f2ab2d |
| size: 93 |
| - path: reports/mllm-sv-vapaakappaleet-orig.json |
| md5: d55b237633198997361b7aab0ba7d10b |
| size: 92 |
| eval-sv@bonsai: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| md5: 2d29064639aa0f8900e4bf58a781826e.dir |
| size: 66510456 |
| nfiles: 1876 |
| - path: data/projects/yso-bonsai-sv |
| md5: 325628ed10b5330b42fdf93489ab870d.dir |
| size: 1035735788 |
| nfiles: 6 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/bonsai-sv-abo-theses.json |
| md5: 65fd5a684e967d01b8527489b6ca38c3 |
| size: 93 |
| - path: reports/bonsai-sv-jyu-theses.json |
| md5: 69084213079115f7f97cf16115d58010 |
| size: 91 |
| - path: reports/bonsai-sv-kirjaesittelyt2021.json |
| md5: 1c2fe10783ce03e22dede97beb120c65 |
| size: 93 |
| - path: reports/bonsai-sv-vapaakappaleet-orig.json |
| md5: f6e85d73d66e9f6cfc0cfa359ba6de73 |
| size: 92 |
| eval-sv@fasttext: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| md5: 2d29064639aa0f8900e4bf58a781826e.dir |
| size: 66510456 |
| nfiles: 1876 |
| - path: data/projects/yso-fasttext-sv |
| md5: 8d272558b0ec13606c0027af74be47e8.dir |
| size: 4915577395 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/fasttext-sv-abo-theses.json |
| md5: ea4e5c8d3c72e853ce0bda1757cd4e41 |
| size: 93 |
| - path: reports/fasttext-sv-jyu-theses.json |
| md5: 8e73efa922698dd1fa968f6dba4c779c |
| size: 92 |
| - path: reports/fasttext-sv-kirjaesittelyt2021.json |
| md5: 42fa4ff3fa951ecf07ce404a1c68d990 |
| size: 93 |
| - path: reports/fasttext-sv-vapaakappaleet-orig.json |
| md5: c8c0290553be948bbd127572efc0fa95 |
| size: 93 |
| eval-en@fasttext: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-en -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| md5: e6fd23c87a07631f24e52f568fad23ea.dir |
| size: 331772939 |
| nfiles: 3825 |
| - path: data/projects/yso-fasttext-en |
| md5: 625be5253a0eee61c44741e63acb1021.dir |
| size: 4077282108 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/fasttext-en-abo-theses.json |
| md5: 66940ff0ea42d44ab1b429905c51858a |
| size: 93 |
| - path: reports/fasttext-en-jyu-theses.json |
| md5: 8bdfe2503c1a002d31121f48daf493db |
| size: 92 |
| - path: reports/fasttext-en-kirjaesittelyt2021.json |
| md5: 1d8d4ce09c768c644d34382497b5bb15 |
| size: 92 |
| - path: reports/fasttext-en-vapaakappaleet-orig.json |
| md5: a8060ee8a76b5b3e7a5134d0edfdc5d9 |
| size: 92 |
| eval-en@mllm: |
| cmd: |
| - venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-en -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| md5: e6fd23c87a07631f24e52f568fad23ea.dir |
| size: 331772939 |
| nfiles: 3825 |
| - path: data/projects/yso-mllm-en |
| md5: 03fe928341b019387a004ce33b85b220.dir |
| size: 38643312 |
| nfiles: 2 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/mllm-en-abo-theses.json |
| md5: 0cc714a7ee3c0a5d42f941da5719dd8c |
| size: 92 |
| - path: reports/mllm-en-jyu-theses.json |
| md5: c8bdc5f4f57799201b97ed62191c608c |
| size: 92 |
| - path: reports/mllm-en-kirjaesittelyt2021.json |
| md5: 121d0ac40869b4dd7b0a1fc6094e2c42 |
| size: 94 |
| - path: reports/mllm-en-vapaakappaleet-orig.json |
| md5: e763f75300be26b3699d40a4cc119526 |
| size: 94 |
| eval-en@bonsai: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-en -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| md5: e6fd23c87a07631f24e52f568fad23ea.dir |
| size: 331772939 |
| nfiles: 3825 |
| - path: data/projects/yso-bonsai-en |
| md5: 7b3c8486bbce25710e969ce706e9ac71.dir |
| size: 2246619174 |
| nfiles: 6 |
| - path: venv-installed |
| md5: abf841d22e1cdf25eb9e9ef2368c240d |
| size: 49 |
| outs: |
| - path: reports/bonsai-en-abo-theses.json |
| md5: 109beeb488b023241e61fb4fddba1d35 |
| size: 93 |
| - path: reports/bonsai-en-jyu-theses.json |
| md5: f917141ae8af29e6834359c35998e98d |
| size: 92 |
| - path: reports/bonsai-en-kirjaesittelyt2021.json |
| md5: 711601791fe7245a961e0bd5fdb6dccd |
| size: 92 |
| - path: reports/bonsai-en-vapaakappaleet-orig.json |
| md5: 2324260b996366b775fbf3c38a14a309 |
| size: 93 |
| eval-sv@fasttext-sv: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| hash: md5 |
| md5: 8424019ec4c261915627865324ac2f73.dir |
| size: 66737727 |
| nfiles: 1878 |
| - path: data/projects/yso-fasttext-sv |
| hash: md5 |
| md5: eccbf5ba8ce07f2777b4bee583ed783b.dir |
| size: 4913883852 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-sv: |
| name: YSO fastText Swedish |
| language: sv |
| backend: fasttext |
| analyzer: snowball(swedish) |
| dim: '560' |
| lr: '0.974349' |
| epoch: '110' |
| minn: '2' |
| maxn: '6' |
| minCount: '2' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: reports/fasttext-sv-abo-theses.json |
| hash: md5 |
| md5: 50e4860a0829e104eb2051c13b7786c2 |
| size: 94 |
| - path: reports/fasttext-sv-jyu-theses.json |
| hash: md5 |
| md5: 194c5356f9792f2d6938c26ffc935f37 |
| size: 91 |
| - path: reports/fasttext-sv-kirjaesittelyt2021.json |
| hash: md5 |
| md5: c49bb0271033d10d951ccce116a0c0c4 |
| size: 93 |
| - path: reports/fasttext-sv-vapaakappaleet-orig.json |
| hash: md5 |
| md5: a85d38c085eac4297435d392077cb7f6 |
| size: 94 |
| eval-sv@sv: |
| cmd: |
| - venv/bin/annif eval yso-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| hash: md5 |
| md5: 8424019ec4c261915627865324ac2f73.dir |
| size: 66737727 |
| nfiles: 1878 |
| - path: data/projects/yso-sv |
| hash: md5 |
| md5: 9cab94546614acf5ac28514687cf26ad.dir |
| size: 1259460957 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-sv: |
| name: ALLFO svenska (2023.1.Ghosha) |
| language: sv |
| backend: nn_ensemble |
| sources: yso-mllm-sv:0.1439,yso-fasttext-sv:0.3302,yso-bonsai-sv:0.5259 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: reports/sv-abo-theses.json |
| hash: md5 |
| md5: 87917904500a6eefe80e1924a0cebe99 |
| size: 92 |
| - path: reports/sv-jyu-theses.json |
| hash: md5 |
| md5: bb440b30d4c995bc9d9ffef140b53817 |
| size: 91 |
| - path: reports/sv-kirjaesittelyt2021.json |
| hash: md5 |
| md5: e9b35e7f38fa85945eb1278bdf01ed30 |
| size: 92 |
| - path: reports/sv-vapaakappaleet-orig.json |
| hash: md5 |
| md5: dbd8e93725f32ba3fcba848e9cb61876 |
| size: 92 |
| eval-en@bonsai-en: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| hash: md5 |
| md5: f1883db613d00c7bce08a1211787f984.dir |
| size: 332654375 |
| nfiles: 3831 |
| - path: data/projects/yso-bonsai-en |
| hash: md5 |
| md5: b1a5a925fbe1a11c153ce3133176c717.dir |
| size: 2321603849 |
| nfiles: 6 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-en: |
| name: YSO Omikuji Bonsai English |
| language: en |
| backend: omikuji |
| analyzer: simple |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '5' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: reports/bonsai-en-abo-theses.json |
| hash: md5 |
| md5: 23fb86f0954118ead50392c7801ce834 |
| size: 91 |
| - path: reports/bonsai-en-jyu-theses.json |
| hash: md5 |
| md5: 0f8b8c4db657ed8aac3b494a98fa6c15 |
| size: 92 |
| - path: reports/bonsai-en-kirjaesittelyt2021.json |
| hash: md5 |
| md5: f047174bbd0de4588072412f2431d288 |
| size: 92 |
| - path: reports/bonsai-en-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 3091f929ce7e0b070d7fc2dfd0fff4c0 |
| size: 92 |
| eval-en@en: |
| cmd: |
| - venv/bin/annif eval yso-en -j 1 -m F1@5 -m NDCG --metrics-file reports/en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-en -j 1 -m F1@5 -m NDCG --metrics-file reports/en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-en -j 10 -m F1@5 -m NDCG --metrics-file reports/en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-en -j 10 -m F1@5 -m NDCG --metrics-file reports/en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| hash: md5 |
| md5: f1883db613d00c7bce08a1211787f984.dir |
| size: 332654375 |
| nfiles: 3831 |
| - path: data/projects/yso-en |
| hash: md5 |
| md5: cf09853815a7e8c621b352c2bc70f9e1.dir |
| size: 1259460957 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-en: |
| name: YSO English (2023.1.Ghosha) |
| language: en |
| backend: nn_ensemble |
| sources: yso-mllm-en:0.3426,yso-fasttext-en:0.1419,yso-bonsai-en:0.5155 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: reports/en-abo-theses.json |
| hash: md5 |
| md5: 8d526675b9af9b477a94213eb1254841 |
| size: 91 |
| - path: reports/en-jyu-theses.json |
| hash: md5 |
| md5: 623001128bd0cdf0caedcc97457f0b1c |
| size: 92 |
| - path: reports/en-kirjaesittelyt2021.json |
| hash: md5 |
| md5: 0dbf7262d50072866df3943231f3517e |
| size: 92 |
| - path: reports/en-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 6968cecb575018eb88e530ad2997888b |
| size: 94 |
| eval-en@fasttext-en: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| hash: md5 |
| md5: f1883db613d00c7bce08a1211787f984.dir |
| size: 332654375 |
| nfiles: 3831 |
| - path: data/projects/yso-fasttext-en |
| hash: md5 |
| md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir |
| size: 4091197300 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-en: |
| name: YSO fastText English |
| language: en |
| backend: fasttext |
| analyzer: snowball(english) |
| dim: '430' |
| lr: '0.506539' |
| epoch: '115' |
| minn: '4' |
| maxn: '5' |
| minCount: '1' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: reports/fasttext-en-abo-theses.json |
| hash: md5 |
| md5: 931f2f652825441088cac77c30dbf3a9 |
| size: 92 |
| - path: reports/fasttext-en-jyu-theses.json |
| hash: md5 |
| md5: bcad7d3bae638d6008d4898ae93cd52a |
| size: 93 |
| - path: reports/fasttext-en-kirjaesittelyt2021.json |
| hash: md5 |
| md5: 85e8cdaa79beb6cfa897146bb45460b2 |
| size: 93 |
| - path: reports/fasttext-en-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 55ce7777768ebcb9421c06551fa9235d |
| size: 93 |
| eval-fi@fasttext-fi: |
| cmd: |
| - venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| hash: md5 |
| md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir |
| size: 385776340 |
| nfiles: 7267 |
| - path: data/projects/yso-fasttext-fi |
| hash: md5 |
| md5: 06ea14ea97351f72fbaeb13517011c88.dir |
| size: 7547313091 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fasttext-fi: |
| name: YSO fastText Finnish |
| language: fi |
| backend: fasttext |
| analyzer: voikko(fi) |
| dim: '660' |
| lr: '0.506539' |
| epoch: '75' |
| minn: '2' |
| maxn: '7' |
| minCount: '2' |
| wordNgrams: '2' |
| loss: hs |
| limit: '1000' |
| chunksize: '24' |
| vocab: yso |
| transform: limit(15000),filter_lang,limit(5000) |
| access: hidden |
| outs: |
| - path: reports/fasttext-fi-jyu-theses.json |
| hash: md5 |
| md5: f44b29e37e940212365d1faba24d807f |
| size: 94 |
| - path: reports/fasttext-fi-kirjaesittelyt2021.json |
| hash: md5 |
| md5: 8557276f66c15738048271a5890256ea |
| size: 93 |
| - path: reports/fasttext-fi-kirjastonhoitaja.json |
| hash: md5 |
| md5: 1f88e3fc726015d206d9323c3b4efab3 |
| size: 92 |
| - path: reports/fasttext-fi-satakunnan-kansa.json |
| hash: md5 |
| md5: c9e037fb4399ee097b07318090a503fa |
| size: 94 |
| - path: reports/fasttext-fi-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 52c838f448cf38789a5a73e72eb121c5 |
| size: 93 |
| eval-fi@mllm-fi: |
| cmd: |
| - venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| hash: md5 |
| md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir |
| size: 385776340 |
| nfiles: 7267 |
| - path: data/projects/yso-mllm-fi |
| hash: md5 |
| md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir |
| size: 36157873 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-fi: |
| name: YSO MLLM Finnish |
| language: fi |
| backend: mllm |
| analyzer: voikko(fi) |
| vocab: yso |
| limit: '1000' |
| transform: limit(3000000) |
| access: hidden |
| outs: |
| - path: reports/mllm-fi-jyu-theses.json |
| hash: md5 |
| md5: e6a84d414ed22b9d9d3fee4a7d955d85 |
| size: 92 |
| - path: reports/mllm-fi-kirjaesittelyt2021.json |
| hash: md5 |
| md5: 6f2cb724ae31fb838b22731c460974d2 |
| size: 94 |
| - path: reports/mllm-fi-kirjastonhoitaja.json |
| hash: md5 |
| md5: 3247bcd04d35ffb318d7e48666a90976 |
| size: 94 |
| - path: reports/mllm-fi-satakunnan-kansa.json |
| hash: md5 |
| md5: 016830e79dfffc8d5ea99fa523e31ba2 |
| size: 93 |
| - path: reports/mllm-fi-vapaakappaleet-orig.json |
| hash: md5 |
| md5: aa637226b7c53ecea64bca339d20687c |
| size: 93 |
| eval-fi@bonsai-fi: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| hash: md5 |
| md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir |
| size: 385776340 |
| nfiles: 7267 |
| - path: data/projects/yso-bonsai-fi |
| hash: md5 |
| md5: 84a1ae3ca24702d0cbb901f3215e1675.dir |
| size: 5354349482 |
| nfiles: 6 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-fi: |
| name: YSO Omikuji Bonsai Finnish |
| language: fi |
| backend: omikuji |
| analyzer: snowball(finnish) |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '5' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: reports/bonsai-fi-jyu-theses.json |
| hash: md5 |
| md5: 76e3cc2484c71e18d965fdd4acd1ba8a |
| size: 93 |
| - path: reports/bonsai-fi-kirjaesittelyt2021.json |
| hash: md5 |
| md5: f2987884156a29aa053ad510993e74a2 |
| size: 94 |
| - path: reports/bonsai-fi-kirjastonhoitaja.json |
| hash: md5 |
| md5: f12f961721bb62fb1a9e94e5d2636aec |
| size: 92 |
| - path: reports/bonsai-fi-satakunnan-kansa.json |
| hash: md5 |
| md5: ad6ad6e95073021b40aaa643171e5773 |
| size: 93 |
| - path: reports/bonsai-fi-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 5904dff99322d3b6019d9d7a1aa7cd8b |
| size: 93 |
| eval-en@mllm-en: |
| cmd: |
| - venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-abo-theses.json |
| corpora/fulltext-test/en/abo-theses/ |
| - venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-jyu-theses.json |
| corpora/fulltext-test/en/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-kirjaesittelyt2021.json |
| corpora/fulltext-test/en/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-vapaakappaleet-orig.json |
| corpora/fulltext-test/en/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/en |
| hash: md5 |
| md5: f1883db613d00c7bce08a1211787f984.dir |
| size: 332654375 |
| nfiles: 3831 |
| - path: data/projects/yso-mllm-en |
| hash: md5 |
| md5: 129793bd06d231413a66ed5611180dbe.dir |
| size: 39175771 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-en: |
| name: YSO MLLM English |
| language: en |
| backend: mllm |
| analyzer: snowball(english) |
| vocab: yso |
| limit: '1000' |
| transform: limit(2500000) |
| access: hidden |
| outs: |
| - path: reports/mllm-en-abo-theses.json |
| hash: md5 |
| md5: 45509a75f80910070d39b864ba64bdc4 |
| size: 90 |
| - path: reports/mllm-en-jyu-theses.json |
| hash: md5 |
| md5: 0cc98fc0e98ec579ae7acd1bcc1c9cde |
| size: 93 |
| - path: reports/mllm-en-kirjaesittelyt2021.json |
| hash: md5 |
| md5: b2a3cec2dc5e54321c6365324f2f616e |
| size: 92 |
| - path: reports/mllm-en-vapaakappaleet-orig.json |
| hash: md5 |
| md5: c36acbab1e4a06d010ae7ecd8e961406 |
| size: 93 |
| eval-sv@bonsai-sv: |
| cmd: |
| - venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| hash: md5 |
| md5: 8424019ec4c261915627865324ac2f73.dir |
| size: 66737727 |
| nfiles: 1878 |
| - path: data/projects/yso-bonsai-sv |
| hash: md5 |
| md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir |
| size: 1029311381 |
| nfiles: 6 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-bonsai-sv: |
| name: YSO Omikuji Bonsai Swedish |
| language: sv |
| backend: omikuji |
| analyzer: snowball(swedish) |
| vocab: yso |
| cluster_balanced: 'False' |
| cluster_k: '100' |
| max_depth: '3' |
| min_df: '2' |
| ngram: '2' |
| limit: '1000' |
| transform: limit(5000) |
| access: hidden |
| outs: |
| - path: reports/bonsai-sv-abo-theses.json |
| hash: md5 |
| md5: c96c3200961b7e9e12ef94ce0a09623d |
| size: 92 |
| - path: reports/bonsai-sv-jyu-theses.json |
| hash: md5 |
| md5: 6db98665f360001f8647af363fb38ff4 |
| size: 91 |
| - path: reports/bonsai-sv-kirjaesittelyt2021.json |
| hash: md5 |
| md5: 8937047ce1e09f29306cfd2f5d1b0a68 |
| size: 92 |
| - path: reports/bonsai-sv-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 1bf7c202c1be124aa26fa510900020b5 |
| size: 93 |
| eval-sv@mllm-sv: |
| cmd: |
| - venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-abo-theses.json |
| corpora/fulltext-test/sv/abo-theses/ |
| - venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-jyu-theses.json |
| corpora/fulltext-test/sv/jyu-theses/ |
| - venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-kirjaesittelyt2021.json |
| corpora/fulltext-test/sv/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-vapaakappaleet-orig.json |
| corpora/fulltext-test/sv/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/sv |
| hash: md5 |
| md5: 8424019ec4c261915627865324ac2f73.dir |
| size: 66737727 |
| nfiles: 1878 |
| - path: data/projects/yso-mllm-sv |
| hash: md5 |
| md5: 88dd945c235bdfc4549fa44f5ea582a4.dir |
| size: 19736546 |
| nfiles: 2 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-mllm-sv: |
| name: YSO MLLM Swedish |
| language: sv |
| backend: mllm |
| analyzer: snowball(swedish) |
| vocab: yso |
| limit: '1000' |
| transform: limit(3000000) |
| access: hidden |
| outs: |
| - path: reports/mllm-sv-abo-theses.json |
| hash: md5 |
| md5: 7b95bec0ae5f0a4346b5ded0c7adfab8 |
| size: 92 |
| - path: reports/mllm-sv-jyu-theses.json |
| hash: md5 |
| md5: c6f2150aebf21e4ddb8d1ebade0aefa5 |
| size: 91 |
| - path: reports/mllm-sv-kirjaesittelyt2021.json |
| hash: md5 |
| md5: ddddbc0b9097fea43c34b5133e2b24ae |
| size: 93 |
| - path: reports/mllm-sv-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 7511ffde5122627168bb00855fe0e4e6 |
| size: 92 |
| eval-fi@fi: |
| cmd: |
| - venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-jyu-theses.json |
| corpora/fulltext-test/fi/jyu-theses/ |
| - venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-kirjaesittelyt2021.json |
| corpora/fulltext-test/fi/kirjaesittelyt2021/ |
| - venv/bin/annif eval yso-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fi-kirjastonhoitaja.json |
| corpora/fulltext-test/fi/kirjastonhoitaja/ |
| - venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-satakunnan-kansa.json |
| corpora/fulltext-test/fi/satakunnan-kansa-?/ |
| - venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-vapaakappaleet-orig.json |
| corpora/fulltext-test/fi/vapaakappaleet-orig/ |
| deps: |
| - path: corpora/fulltext-test/fi |
| hash: md5 |
| md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir |
| size: 385776340 |
| nfiles: 7267 |
| - path: data/projects/yso-fi |
| hash: md5 |
| md5: a0961bd4dfcfee8367b22ad17d780dbb.dir |
| size: 1259460957 |
| nfiles: 3 |
| - path: venv-installed |
| hash: md5 |
| md5: 829dcd06bff073c054d28bb526b8b16f |
| size: 48 |
| params: |
| projects.toml: |
| yso-fi: |
| name: YSO suomi (2023.1.Ghosha) |
| language: fi |
| backend: nn_ensemble |
| sources: yso-mllm-fi:0.1492,yso-fasttext-fi:0.6090,yso-bonsai-fi:0.2418 |
| limit: '100' |
| vocab: yso |
| nodes: '100' |
| dropout_rate: '0.2' |
| epochs: '10' |
| outs: |
| - path: reports/fi-jyu-theses.json |
| hash: md5 |
| md5: f60303e36e729af9a6ba5fea084b440e |
| size: 91 |
| - path: reports/fi-kirjaesittelyt2021.json |
| hash: md5 |
| md5: e9ee9149f010d08d85bf4869644e25b8 |
| size: 94 |
| - path: reports/fi-kirjastonhoitaja.json |
| hash: md5 |
| md5: dec9471c35e4fef43ab484c4fd6ab0b3 |
| size: 91 |
| - path: reports/fi-satakunnan-kansa.json |
| hash: md5 |
| md5: 73c32b1eae7f182e1204f65b13022929 |
| size: 93 |
| - path: reports/fi-vapaakappaleet-orig.json |
| hash: md5 |
| md5: 50921e6a32b8d391ccfd01d55cbd23be |
| size: 94 |
|
|