FintoAI-data-YSO / dvc.lock
juhoinkinen's picture
Upload folder using huggingface_hub
3d570df verified
raw
history blame
53.2 kB
schema: '2.0'
stages:
install:
cmd:
- python3 -m venv venv
- source venv/bin/activate && pip install -U pip wheel setuptools && pip install
-r requirements.txt
- cp requirements.txt venv-installed
deps:
- path: requirements.txt
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
outs:
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
load-vocab:
cmd: venv/bin/annif load-vocab --force yso corpora/yso-skos.ttl
deps:
- path: corpora/yso-skos.ttl
hash: md5
md5: c3d9a5148c46efa4fbf11ee866154ebf
size: 32953533
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
outs:
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
train-mllm@fi:
cmd: venv/bin/annif train yso-mllm-fi corpora/fulltext-train/fi/*/ -j 16
deps:
- path: corpora/fulltext-train/fi
hash: md5
md5: f5c1820afb398fa8145181cf22905336.dir
size: 413860133
nfiles: 5583
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-fi:
name: YSO MLLM Finnish
language: fi
backend: mllm
analyzer: voikko(fi)
vocab: yso
limit: '1000'
transform: limit(3000000)
access: hidden
outs:
- path: data/projects/yso-mllm-fi
hash: md5
md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir
size: 36157873
nfiles: 2
train-mllm@en:
cmd: venv/bin/annif train yso-mllm-en corpora/fulltext-train/en/*/ -j 16
deps:
- path: corpora/fulltext-train/en
hash: md5
md5: 426f141f77c5bac77b32784e4b827d31.dir
size: 268351812
nfiles: 4584
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-en:
name: YSO MLLM English
language: en
backend: mllm
analyzer: snowball(english)
vocab: yso
limit: '1000'
transform: limit(2500000)
access: hidden
outs:
- path: data/projects/yso-mllm-en
hash: md5
md5: 129793bd06d231413a66ed5611180dbe.dir
size: 39175771
nfiles: 2
train-mllm@sv:
cmd: venv/bin/annif train yso-mllm-sv corpora/fulltext-train/sv/*/ -j 16
deps:
- path: corpora/fulltext-train/sv
hash: md5
md5: c64480b5f34b1895db972d774abe12cc.dir
size: 155098642
nfiles: 3754
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-sv:
name: YSO MLLM Swedish
language: sv
backend: mllm
analyzer: snowball(swedish)
vocab: yso
limit: '1000'
transform: limit(3000000)
access: hidden
outs:
- path: data/projects/yso-mllm-sv
hash: md5
md5: 88dd945c235bdfc4549fa44f5ea582a4.dir
size: 19736546
nfiles: 2
train-omikuji@sv:
cmd: venv/bin/annif train yso-bonsai-sv corpora/shorttext-train/sv/yso-finna-sv*.tsv.gz
deps:
- path: corpora/shorttext-train/sv/
hash: md5
md5: 33a49e42ec12daf0c973c792928a2cb0.dir
size: 40515364
nfiles: 3
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-sv:
name: YSO Omikuji Bonsai Swedish
language: sv
backend: omikuji
analyzer: snowball(swedish)
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '2'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: data/projects/yso-bonsai-sv
hash: md5
md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir
size: 1029311381
nfiles: 6
train-omikuji@en:
cmd: venv/bin/annif train yso-bonsai-en corpora/shorttext-train/en/yso-finna-en*.tsv.gz
deps:
- path: corpora/shorttext-train/en/
hash: md5
md5: 19e76af78210f39cf02a5c8bbee38f60.dir
size: 98829194
nfiles: 3
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-en:
name: YSO Omikuji Bonsai English
language: en
backend: omikuji
analyzer: simple
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '5'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: data/projects/yso-bonsai-en
hash: md5
md5: b1a5a925fbe1a11c153ce3133176c717.dir
size: 2321603849
nfiles: 6
train-omikuji@fi:
cmd: venv/bin/annif train yso-bonsai-fi corpora/shorttext-train/fi/yso-finna-fi*.tsv.gz
deps:
- path: corpora/shorttext-train/fi/
hash: md5
md5: 5a2f47124433a7215e6c391a48c0aeca.dir
size: 358561386
nfiles: 9
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-fi:
name: YSO Omikuji Bonsai Finnish
language: fi
backend: omikuji
analyzer: snowball(finnish)
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '5'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: data/projects/yso-bonsai-fi
hash: md5
md5: 84a1ae3ca24702d0cbb901f3215e1675.dir
size: 5354349482
nfiles: 6
train-fasttext@sv:
cmd: venv/bin/annif train yso-fasttext-sv corpora/shorttext-train/sv/yso-finna-sv*.tsv.gz
deps:
- path: corpora/shorttext-train/sv/
hash: md5
md5: 33a49e42ec12daf0c973c792928a2cb0.dir
size: 40515364
nfiles: 3
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-sv:
name: YSO fastText Swedish
language: sv
backend: fasttext
analyzer: snowball(swedish)
dim: '560'
lr: '0.974349'
epoch: '110'
minn: '2'
maxn: '6'
minCount: '2'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: data/projects/yso-fasttext-sv
hash: md5
md5: eccbf5ba8ce07f2777b4bee583ed783b.dir
size: 4913883852
nfiles: 2
train-fasttext@fi:
cmd: venv/bin/annif train yso-fasttext-fi corpora/shorttext-train/fi/yso-finna-fi*.tsv.gz
deps:
- path: corpora/shorttext-train/fi/
hash: md5
md5: 5a2f47124433a7215e6c391a48c0aeca.dir
size: 358561386
nfiles: 9
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-fi:
name: YSO fastText Finnish
language: fi
backend: fasttext
analyzer: voikko(fi)
dim: '660'
lr: '0.506539'
epoch: '75'
minn: '2'
maxn: '7'
minCount: '2'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: data/projects/yso-fasttext-fi
hash: md5
md5: 06ea14ea97351f72fbaeb13517011c88.dir
size: 7547313091
nfiles: 2
train-fasttext@en:
cmd: venv/bin/annif train yso-fasttext-en corpora/shorttext-train/en/yso-finna-en*.tsv.gz
deps:
- path: corpora/shorttext-train/en/
hash: md5
md5: 19e76af78210f39cf02a5c8bbee38f60.dir
size: 98829194
nfiles: 3
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-en:
name: YSO fastText English
language: en
backend: fasttext
analyzer: snowball(english)
dim: '430'
lr: '0.506539'
epoch: '115'
minn: '4'
maxn: '5'
minCount: '1'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: data/projects/yso-fasttext-en
hash: md5
md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir
size: 4091197300
nfiles: 2
train-nn-ensemble@sv:
cmd: venv/bin/annif train yso-sv corpora/fulltext-train/sv/*/ -j 16
deps:
- path: corpora/fulltext-train/sv
hash: md5
md5: c64480b5f34b1895db972d774abe12cc.dir
size: 155098642
nfiles: 3754
- path: data/projects/yso-bonsai-sv
hash: md5
md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir
size: 1029311381
nfiles: 6
- path: data/projects/yso-fasttext-sv
hash: md5
md5: eccbf5ba8ce07f2777b4bee583ed783b.dir
size: 4913883852
nfiles: 2
- path: data/projects/yso-mllm-sv
hash: md5
md5: 88dd945c235bdfc4549fa44f5ea582a4.dir
size: 19736546
nfiles: 2
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-sv:
name: ALLFO svenska (2023.1.Ghosha)
language: sv
backend: nn_ensemble
sources: yso-mllm-sv:0.1439,yso-fasttext-sv:0.3302,yso-bonsai-sv:0.5259
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: data/projects/yso-sv
hash: md5
md5: 9cab94546614acf5ac28514687cf26ad.dir
size: 1259460957
nfiles: 3
train-nn-ensemble@en:
cmd: venv/bin/annif train yso-en corpora/fulltext-train/en/*/ -j 16
deps:
- path: corpora/fulltext-train/en
hash: md5
md5: 426f141f77c5bac77b32784e4b827d31.dir
size: 268351812
nfiles: 4584
- path: data/projects/yso-bonsai-en
hash: md5
md5: b1a5a925fbe1a11c153ce3133176c717.dir
size: 2321603849
nfiles: 6
- path: data/projects/yso-fasttext-en
hash: md5
md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir
size: 4091197300
nfiles: 2
- path: data/projects/yso-mllm-en
hash: md5
md5: 129793bd06d231413a66ed5611180dbe.dir
size: 39175771
nfiles: 2
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-en:
name: YSO English (2023.1.Ghosha)
language: en
backend: nn_ensemble
sources: yso-mllm-en:0.3426,yso-fasttext-en:0.1419,yso-bonsai-en:0.5155
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: data/projects/yso-en
hash: md5
md5: cf09853815a7e8c621b352c2bc70f9e1.dir
size: 1259460957
nfiles: 3
train-nn-ensemble@fi:
cmd: venv/bin/annif train yso-fi corpora/fulltext-train/fi/*/ -j 16
deps:
- path: corpora/fulltext-train/fi
hash: md5
md5: f5c1820afb398fa8145181cf22905336.dir
size: 413860133
nfiles: 5583
- path: data/projects/yso-bonsai-fi
hash: md5
md5: 84a1ae3ca24702d0cbb901f3215e1675.dir
size: 5354349482
nfiles: 6
- path: data/projects/yso-fasttext-fi
hash: md5
md5: 06ea14ea97351f72fbaeb13517011c88.dir
size: 7547313091
nfiles: 2
- path: data/projects/yso-mllm-fi
hash: md5
md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir
size: 36157873
nfiles: 2
- path: data/vocabs/yso
hash: md5
md5: ad8a2ea707ceac4f455d2b3b2f7a7c90.dir
size: 61626265
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fi:
name: YSO suomi (2023.1.Ghosha)
language: fi
backend: nn_ensemble
sources: yso-mllm-fi:0.1492,yso-fasttext-fi:0.6090,yso-bonsai-fi:0.2418
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: data/projects/yso-fi
hash: md5
md5: a0961bd4dfcfee8367b22ad17d780dbb.dir
size: 1259460957
nfiles: 3
eval-fi@mllm:
cmd:
- venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-mllm-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-mllm-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
md5: 169cae26a0b93733aed807f8e9d9ca40.dir
size: 385626157
nfiles: 7267
- path: data/projects/yso-mllm-fi
md5: c42dd794bb146ecf7cf60e5f49167ab2.dir
size: 36078206
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/mllm-fi-jyu-theses.json
md5: 017241308807e76f5c4ecc675c280bfa
size: 92
- path: reports/mllm-fi-kirjaesittelyt2021.json
md5: b289df9d0158a0e8c6cd74dd2df7d9c2
size: 94
- path: reports/mllm-fi-kirjastonhoitaja.json
md5: 1ed10e3905a72ffe5181bc74ec27599d
size: 93
- path: reports/mllm-fi-satakunnan-kansa.json
md5: 7100b02bb18dad3f8e8d073b7bff9e50
size: 93
- path: reports/mllm-fi-vapaakappaleet-orig.json
md5: bb3ed842b8468708c1c064452b6b24f1
size: 93
eval-fi@bonsai:
cmd:
- venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-bonsai-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-bonsai-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
md5: 169cae26a0b93733aed807f8e9d9ca40.dir
size: 385626157
nfiles: 7267
- path: data/projects/yso-bonsai-fi
md5: e065094c56ff3a61e2a4648e47156c8f.dir
size: 5277448954
nfiles: 6
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/bonsai-fi-jyu-theses.json
md5: 660095be4380591aa8c7f839ffbf1aa4
size: 92
- path: reports/bonsai-fi-kirjaesittelyt2021.json
md5: 61540b3360540911b4ec3a934abb74ca
size: 94
- path: reports/bonsai-fi-kirjastonhoitaja.json
md5: 95696330bd44571520752bdb4a460287
size: 93
- path: reports/bonsai-fi-satakunnan-kansa.json
md5: 2bb5923811b0760695cb5ad2465469e5
size: 93
- path: reports/bonsai-fi-vapaakappaleet-orig.json
md5: d369966bf45650f6b9643469343ab65d
size: 93
eval-fi@fasttext:
cmd:
- venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-fasttext-fi -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-fasttext-fi -j 4 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
md5: 169cae26a0b93733aed807f8e9d9ca40.dir
size: 385626157
nfiles: 7267
- path: data/projects/yso-fasttext-fi
md5: 0f5c9eb966671d610e8d8556b270652c.dir
size: 7519964475
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/fasttext-fi-jyu-theses.json
md5: 339a05b2faa098074a7717ef6677048b
size: 93
- path: reports/fasttext-fi-kirjaesittelyt2021.json
md5: b0425400c2513d8580bf726bc0b487d3
size: 94
- path: reports/fasttext-fi-kirjastonhoitaja.json
md5: 7fe1e06726e05effef46a1cb97078d72
size: 92
- path: reports/fasttext-fi-satakunnan-kansa.json
md5: 63b1b03ad124c17bc3570188f1855ec9
size: 93
- path: reports/fasttext-fi-vapaakappaleet-orig.json
md5: 0acd6e385633742bd18513109a5a87dc
size: 94
eval-sv@mllm:
cmd:
- venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-mllm-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
md5: 2d29064639aa0f8900e4bf58a781826e.dir
size: 66510456
nfiles: 1876
- path: data/projects/yso-mllm-sv
md5: fc2e344bac4abd0048c7c286ffdba0eb.dir
size: 19904278
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/mllm-sv-abo-theses.json
md5: 0d6f871b92dfb263c56e17f644e99513
size: 92
- path: reports/mllm-sv-jyu-theses.json
md5: 81f8eb0a6908169aeb2ba236a0baf3f7
size: 91
- path: reports/mllm-sv-kirjaesittelyt2021.json
md5: 0886460a79805dd7698d6c7f60f2ab2d
size: 93
- path: reports/mllm-sv-vapaakappaleet-orig.json
md5: d55b237633198997361b7aab0ba7d10b
size: 92
eval-sv@bonsai:
cmd:
- venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-bonsai-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
md5: 2d29064639aa0f8900e4bf58a781826e.dir
size: 66510456
nfiles: 1876
- path: data/projects/yso-bonsai-sv
md5: 325628ed10b5330b42fdf93489ab870d.dir
size: 1035735788
nfiles: 6
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/bonsai-sv-abo-theses.json
md5: 65fd5a684e967d01b8527489b6ca38c3
size: 93
- path: reports/bonsai-sv-jyu-theses.json
md5: 69084213079115f7f97cf16115d58010
size: 91
- path: reports/bonsai-sv-kirjaesittelyt2021.json
md5: 1c2fe10783ce03e22dede97beb120c65
size: 93
- path: reports/bonsai-sv-vapaakappaleet-orig.json
md5: f6e85d73d66e9f6cfc0cfa359ba6de73
size: 92
eval-sv@fasttext:
cmd:
- venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-fasttext-sv -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
md5: 2d29064639aa0f8900e4bf58a781826e.dir
size: 66510456
nfiles: 1876
- path: data/projects/yso-fasttext-sv
md5: 8d272558b0ec13606c0027af74be47e8.dir
size: 4915577395
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/fasttext-sv-abo-theses.json
md5: ea4e5c8d3c72e853ce0bda1757cd4e41
size: 93
- path: reports/fasttext-sv-jyu-theses.json
md5: 8e73efa922698dd1fa968f6dba4c779c
size: 92
- path: reports/fasttext-sv-kirjaesittelyt2021.json
md5: 42fa4ff3fa951ecf07ce404a1c68d990
size: 93
- path: reports/fasttext-sv-vapaakappaleet-orig.json
md5: c8c0290553be948bbd127572efc0fa95
size: 93
eval-en@fasttext:
cmd:
- venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-fasttext-en -j 6 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
md5: e6fd23c87a07631f24e52f568fad23ea.dir
size: 331772939
nfiles: 3825
- path: data/projects/yso-fasttext-en
md5: 625be5253a0eee61c44741e63acb1021.dir
size: 4077282108
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/fasttext-en-abo-theses.json
md5: 66940ff0ea42d44ab1b429905c51858a
size: 93
- path: reports/fasttext-en-jyu-theses.json
md5: 8bdfe2503c1a002d31121f48daf493db
size: 92
- path: reports/fasttext-en-kirjaesittelyt2021.json
md5: 1d8d4ce09c768c644d34382497b5bb15
size: 92
- path: reports/fasttext-en-vapaakappaleet-orig.json
md5: a8060ee8a76b5b3e7a5134d0edfdc5d9
size: 92
eval-en@mllm:
cmd:
- venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-mllm-en -j 6 -m F1@5 -m NDCG --metrics-file reports/mllm-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
md5: e6fd23c87a07631f24e52f568fad23ea.dir
size: 331772939
nfiles: 3825
- path: data/projects/yso-mllm-en
md5: 03fe928341b019387a004ce33b85b220.dir
size: 38643312
nfiles: 2
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/mllm-en-abo-theses.json
md5: 0cc714a7ee3c0a5d42f941da5719dd8c
size: 92
- path: reports/mllm-en-jyu-theses.json
md5: c8bdc5f4f57799201b97ed62191c608c
size: 92
- path: reports/mllm-en-kirjaesittelyt2021.json
md5: 121d0ac40869b4dd7b0a1fc6094e2c42
size: 94
- path: reports/mllm-en-vapaakappaleet-orig.json
md5: e763f75300be26b3699d40a4cc119526
size: 94
eval-en@bonsai:
cmd:
- venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-bonsai-en -j 6 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
md5: e6fd23c87a07631f24e52f568fad23ea.dir
size: 331772939
nfiles: 3825
- path: data/projects/yso-bonsai-en
md5: 7b3c8486bbce25710e969ce706e9ac71.dir
size: 2246619174
nfiles: 6
- path: venv-installed
md5: abf841d22e1cdf25eb9e9ef2368c240d
size: 49
outs:
- path: reports/bonsai-en-abo-theses.json
md5: 109beeb488b023241e61fb4fddba1d35
size: 93
- path: reports/bonsai-en-jyu-theses.json
md5: f917141ae8af29e6834359c35998e98d
size: 92
- path: reports/bonsai-en-kirjaesittelyt2021.json
md5: 711601791fe7245a961e0bd5fdb6dccd
size: 92
- path: reports/bonsai-en-vapaakappaleet-orig.json
md5: 2324260b996366b775fbf3c38a14a309
size: 93
eval-sv@fasttext-sv:
cmd:
- venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-fasttext-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
hash: md5
md5: 8424019ec4c261915627865324ac2f73.dir
size: 66737727
nfiles: 1878
- path: data/projects/yso-fasttext-sv
hash: md5
md5: eccbf5ba8ce07f2777b4bee583ed783b.dir
size: 4913883852
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-sv:
name: YSO fastText Swedish
language: sv
backend: fasttext
analyzer: snowball(swedish)
dim: '560'
lr: '0.974349'
epoch: '110'
minn: '2'
maxn: '6'
minCount: '2'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: reports/fasttext-sv-abo-theses.json
hash: md5
md5: 50e4860a0829e104eb2051c13b7786c2
size: 94
- path: reports/fasttext-sv-jyu-theses.json
hash: md5
md5: 194c5356f9792f2d6938c26ffc935f37
size: 91
- path: reports/fasttext-sv-kirjaesittelyt2021.json
hash: md5
md5: c49bb0271033d10d951ccce116a0c0c4
size: 93
- path: reports/fasttext-sv-vapaakappaleet-orig.json
hash: md5
md5: a85d38c085eac4297435d392077cb7f6
size: 94
eval-sv@sv:
cmd:
- venv/bin/annif eval yso-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
hash: md5
md5: 8424019ec4c261915627865324ac2f73.dir
size: 66737727
nfiles: 1878
- path: data/projects/yso-sv
hash: md5
md5: 9cab94546614acf5ac28514687cf26ad.dir
size: 1259460957
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-sv:
name: ALLFO svenska (2023.1.Ghosha)
language: sv
backend: nn_ensemble
sources: yso-mllm-sv:0.1439,yso-fasttext-sv:0.3302,yso-bonsai-sv:0.5259
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: reports/sv-abo-theses.json
hash: md5
md5: 87917904500a6eefe80e1924a0cebe99
size: 92
- path: reports/sv-jyu-theses.json
hash: md5
md5: bb440b30d4c995bc9d9ffef140b53817
size: 91
- path: reports/sv-kirjaesittelyt2021.json
hash: md5
md5: e9b35e7f38fa85945eb1278bdf01ed30
size: 92
- path: reports/sv-vapaakappaleet-orig.json
hash: md5
md5: dbd8e93725f32ba3fcba848e9cb61876
size: 92
eval-en@bonsai-en:
cmd:
- venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-bonsai-en -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-en -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
hash: md5
md5: f1883db613d00c7bce08a1211787f984.dir
size: 332654375
nfiles: 3831
- path: data/projects/yso-bonsai-en
hash: md5
md5: b1a5a925fbe1a11c153ce3133176c717.dir
size: 2321603849
nfiles: 6
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-en:
name: YSO Omikuji Bonsai English
language: en
backend: omikuji
analyzer: simple
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '5'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: reports/bonsai-en-abo-theses.json
hash: md5
md5: 23fb86f0954118ead50392c7801ce834
size: 91
- path: reports/bonsai-en-jyu-theses.json
hash: md5
md5: 0f8b8c4db657ed8aac3b494a98fa6c15
size: 92
- path: reports/bonsai-en-kirjaesittelyt2021.json
hash: md5
md5: f047174bbd0de4588072412f2431d288
size: 92
- path: reports/bonsai-en-vapaakappaleet-orig.json
hash: md5
md5: 3091f929ce7e0b070d7fc2dfd0fff4c0
size: 92
eval-en@en:
cmd:
- venv/bin/annif eval yso-en -j 1 -m F1@5 -m NDCG --metrics-file reports/en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-en -j 1 -m F1@5 -m NDCG --metrics-file reports/en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-en -j 10 -m F1@5 -m NDCG --metrics-file reports/en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-en -j 10 -m F1@5 -m NDCG --metrics-file reports/en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
hash: md5
md5: f1883db613d00c7bce08a1211787f984.dir
size: 332654375
nfiles: 3831
- path: data/projects/yso-en
hash: md5
md5: cf09853815a7e8c621b352c2bc70f9e1.dir
size: 1259460957
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-en:
name: YSO English (2023.1.Ghosha)
language: en
backend: nn_ensemble
sources: yso-mllm-en:0.3426,yso-fasttext-en:0.1419,yso-bonsai-en:0.5155
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: reports/en-abo-theses.json
hash: md5
md5: 8d526675b9af9b477a94213eb1254841
size: 91
- path: reports/en-jyu-theses.json
hash: md5
md5: 623001128bd0cdf0caedcc97457f0b1c
size: 92
- path: reports/en-kirjaesittelyt2021.json
hash: md5
md5: 0dbf7262d50072866df3943231f3517e
size: 92
- path: reports/en-vapaakappaleet-orig.json
hash: md5
md5: 6968cecb575018eb88e530ad2997888b
size: 94
eval-en@fasttext-en:
cmd:
- venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-fasttext-en -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-en -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
hash: md5
md5: f1883db613d00c7bce08a1211787f984.dir
size: 332654375
nfiles: 3831
- path: data/projects/yso-fasttext-en
hash: md5
md5: ca3fa22f9a2b6d46246e351c7a1e3256.dir
size: 4091197300
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-en:
name: YSO fastText English
language: en
backend: fasttext
analyzer: snowball(english)
dim: '430'
lr: '0.506539'
epoch: '115'
minn: '4'
maxn: '5'
minCount: '1'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: reports/fasttext-en-abo-theses.json
hash: md5
md5: 931f2f652825441088cac77c30dbf3a9
size: 92
- path: reports/fasttext-en-jyu-theses.json
hash: md5
md5: bcad7d3bae638d6008d4898ae93cd52a
size: 93
- path: reports/fasttext-en-kirjaesittelyt2021.json
hash: md5
md5: 85e8cdaa79beb6cfa897146bb45460b2
size: 93
- path: reports/fasttext-en-vapaakappaleet-orig.json
hash: md5
md5: 55ce7777768ebcb9421c06551fa9235d
size: 93
eval-fi@fasttext-fi:
cmd:
- venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-fasttext-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-fasttext-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fasttext-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
hash: md5
md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir
size: 385776340
nfiles: 7267
- path: data/projects/yso-fasttext-fi
hash: md5
md5: 06ea14ea97351f72fbaeb13517011c88.dir
size: 7547313091
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fasttext-fi:
name: YSO fastText Finnish
language: fi
backend: fasttext
analyzer: voikko(fi)
dim: '660'
lr: '0.506539'
epoch: '75'
minn: '2'
maxn: '7'
minCount: '2'
wordNgrams: '2'
loss: hs
limit: '1000'
chunksize: '24'
vocab: yso
transform: limit(15000),filter_lang,limit(5000)
access: hidden
outs:
- path: reports/fasttext-fi-jyu-theses.json
hash: md5
md5: f44b29e37e940212365d1faba24d807f
size: 94
- path: reports/fasttext-fi-kirjaesittelyt2021.json
hash: md5
md5: 8557276f66c15738048271a5890256ea
size: 93
- path: reports/fasttext-fi-kirjastonhoitaja.json
hash: md5
md5: 1f88e3fc726015d206d9323c3b4efab3
size: 92
- path: reports/fasttext-fi-satakunnan-kansa.json
hash: md5
md5: c9e037fb4399ee097b07318090a503fa
size: 94
- path: reports/fasttext-fi-vapaakappaleet-orig.json
hash: md5
md5: 52c838f448cf38789a5a73e72eb121c5
size: 93
eval-fi@mllm-fi:
cmd:
- venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-mllm-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
hash: md5
md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir
size: 385776340
nfiles: 7267
- path: data/projects/yso-mllm-fi
hash: md5
md5: fb05a7c1e6b2ed72fee85fbbc5b7374b.dir
size: 36157873
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-fi:
name: YSO MLLM Finnish
language: fi
backend: mllm
analyzer: voikko(fi)
vocab: yso
limit: '1000'
transform: limit(3000000)
access: hidden
outs:
- path: reports/mllm-fi-jyu-theses.json
hash: md5
md5: e6a84d414ed22b9d9d3fee4a7d955d85
size: 92
- path: reports/mllm-fi-kirjaesittelyt2021.json
hash: md5
md5: 6f2cb724ae31fb838b22731c460974d2
size: 94
- path: reports/mllm-fi-kirjastonhoitaja.json
hash: md5
md5: 3247bcd04d35ffb318d7e48666a90976
size: 94
- path: reports/mllm-fi-satakunnan-kansa.json
hash: md5
md5: 016830e79dfffc8d5ea99fa523e31ba2
size: 93
- path: reports/mllm-fi-vapaakappaleet-orig.json
hash: md5
md5: aa637226b7c53ecea64bca339d20687c
size: 93
eval-fi@bonsai-fi:
cmd:
- venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-bonsai-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
hash: md5
md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir
size: 385776340
nfiles: 7267
- path: data/projects/yso-bonsai-fi
hash: md5
md5: 84a1ae3ca24702d0cbb901f3215e1675.dir
size: 5354349482
nfiles: 6
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-fi:
name: YSO Omikuji Bonsai Finnish
language: fi
backend: omikuji
analyzer: snowball(finnish)
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '5'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: reports/bonsai-fi-jyu-theses.json
hash: md5
md5: 76e3cc2484c71e18d965fdd4acd1ba8a
size: 93
- path: reports/bonsai-fi-kirjaesittelyt2021.json
hash: md5
md5: f2987884156a29aa053ad510993e74a2
size: 94
- path: reports/bonsai-fi-kirjastonhoitaja.json
hash: md5
md5: f12f961721bb62fb1a9e94e5d2636aec
size: 92
- path: reports/bonsai-fi-satakunnan-kansa.json
hash: md5
md5: ad6ad6e95073021b40aaa643171e5773
size: 93
- path: reports/bonsai-fi-vapaakappaleet-orig.json
hash: md5
md5: 5904dff99322d3b6019d9d7a1aa7cd8b
size: 93
eval-en@mllm-en:
cmd:
- venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-abo-theses.json
corpora/fulltext-test/en/abo-theses/
- venv/bin/annif eval yso-mllm-en -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-en-jyu-theses.json
corpora/fulltext-test/en/jyu-theses/
- venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-kirjaesittelyt2021.json
corpora/fulltext-test/en/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-en -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-en-vapaakappaleet-orig.json
corpora/fulltext-test/en/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/en
hash: md5
md5: f1883db613d00c7bce08a1211787f984.dir
size: 332654375
nfiles: 3831
- path: data/projects/yso-mllm-en
hash: md5
md5: 129793bd06d231413a66ed5611180dbe.dir
size: 39175771
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-en:
name: YSO MLLM English
language: en
backend: mllm
analyzer: snowball(english)
vocab: yso
limit: '1000'
transform: limit(2500000)
access: hidden
outs:
- path: reports/mllm-en-abo-theses.json
hash: md5
md5: 45509a75f80910070d39b864ba64bdc4
size: 90
- path: reports/mllm-en-jyu-theses.json
hash: md5
md5: 0cc98fc0e98ec579ae7acd1bcc1c9cde
size: 93
- path: reports/mllm-en-kirjaesittelyt2021.json
hash: md5
md5: b2a3cec2dc5e54321c6365324f2f616e
size: 92
- path: reports/mllm-en-vapaakappaleet-orig.json
hash: md5
md5: c36acbab1e4a06d010ae7ecd8e961406
size: 93
eval-sv@bonsai-sv:
cmd:
- venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-bonsai-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-bonsai-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/bonsai-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
hash: md5
md5: 8424019ec4c261915627865324ac2f73.dir
size: 66737727
nfiles: 1878
- path: data/projects/yso-bonsai-sv
hash: md5
md5: 77a54001b5d18a7a474bcf7d4e9577e1.dir
size: 1029311381
nfiles: 6
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-bonsai-sv:
name: YSO Omikuji Bonsai Swedish
language: sv
backend: omikuji
analyzer: snowball(swedish)
vocab: yso
cluster_balanced: 'False'
cluster_k: '100'
max_depth: '3'
min_df: '2'
ngram: '2'
limit: '1000'
transform: limit(5000)
access: hidden
outs:
- path: reports/bonsai-sv-abo-theses.json
hash: md5
md5: c96c3200961b7e9e12ef94ce0a09623d
size: 92
- path: reports/bonsai-sv-jyu-theses.json
hash: md5
md5: 6db98665f360001f8647af363fb38ff4
size: 91
- path: reports/bonsai-sv-kirjaesittelyt2021.json
hash: md5
md5: 8937047ce1e09f29306cfd2f5d1b0a68
size: 92
- path: reports/bonsai-sv-vapaakappaleet-orig.json
hash: md5
md5: 1bf7c202c1be124aa26fa510900020b5
size: 93
eval-sv@mllm-sv:
cmd:
- venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-abo-theses.json
corpora/fulltext-test/sv/abo-theses/
- venv/bin/annif eval yso-mllm-sv -j 1 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-jyu-theses.json
corpora/fulltext-test/sv/jyu-theses/
- venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-kirjaesittelyt2021.json
corpora/fulltext-test/sv/kirjaesittelyt2021/
- venv/bin/annif eval yso-mllm-sv -j 10 -m F1@5 -m NDCG --metrics-file reports/mllm-sv-vapaakappaleet-orig.json
corpora/fulltext-test/sv/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/sv
hash: md5
md5: 8424019ec4c261915627865324ac2f73.dir
size: 66737727
nfiles: 1878
- path: data/projects/yso-mllm-sv
hash: md5
md5: 88dd945c235bdfc4549fa44f5ea582a4.dir
size: 19736546
nfiles: 2
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-mllm-sv:
name: YSO MLLM Swedish
language: sv
backend: mllm
analyzer: snowball(swedish)
vocab: yso
limit: '1000'
transform: limit(3000000)
access: hidden
outs:
- path: reports/mllm-sv-abo-theses.json
hash: md5
md5: 7b95bec0ae5f0a4346b5ded0c7adfab8
size: 92
- path: reports/mllm-sv-jyu-theses.json
hash: md5
md5: c6f2150aebf21e4ddb8d1ebade0aefa5
size: 91
- path: reports/mllm-sv-kirjaesittelyt2021.json
hash: md5
md5: ddddbc0b9097fea43c34b5133e2b24ae
size: 93
- path: reports/mllm-sv-vapaakappaleet-orig.json
hash: md5
md5: 7511ffde5122627168bb00855fe0e4e6
size: 92
eval-fi@fi:
cmd:
- venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-jyu-theses.json
corpora/fulltext-test/fi/jyu-theses/
- venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-kirjaesittelyt2021.json
corpora/fulltext-test/fi/kirjaesittelyt2021/
- venv/bin/annif eval yso-fi -j 1 -m F1@5 -m NDCG --metrics-file reports/fi-kirjastonhoitaja.json
corpora/fulltext-test/fi/kirjastonhoitaja/
- venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-satakunnan-kansa.json
corpora/fulltext-test/fi/satakunnan-kansa-?/
- venv/bin/annif eval yso-fi -j 10 -m F1@5 -m NDCG --metrics-file reports/fi-vapaakappaleet-orig.json
corpora/fulltext-test/fi/vapaakappaleet-orig/
deps:
- path: corpora/fulltext-test/fi
hash: md5
md5: 0fe8c48afc24e8c843a78fd6d76609a4.dir
size: 385776340
nfiles: 7267
- path: data/projects/yso-fi
hash: md5
md5: a0961bd4dfcfee8367b22ad17d780dbb.dir
size: 1259460957
nfiles: 3
- path: venv-installed
hash: md5
md5: 829dcd06bff073c054d28bb526b8b16f
size: 48
params:
projects.toml:
yso-fi:
name: YSO suomi (2023.1.Ghosha)
language: fi
backend: nn_ensemble
sources: yso-mllm-fi:0.1492,yso-fasttext-fi:0.6090,yso-bonsai-fi:0.2418
limit: '100'
vocab: yso
nodes: '100'
dropout_rate: '0.2'
epochs: '10'
outs:
- path: reports/fi-jyu-theses.json
hash: md5
md5: f60303e36e729af9a6ba5fea084b440e
size: 91
- path: reports/fi-kirjaesittelyt2021.json
hash: md5
md5: e9ee9149f010d08d85bf4869644e25b8
size: 94
- path: reports/fi-kirjastonhoitaja.json
hash: md5
md5: dec9471c35e4fef43ab484c4fd6ab0b3
size: 91
- path: reports/fi-satakunnan-kansa.json
hash: md5
md5: 73c32b1eae7f182e1204f65b13022929
size: 93
- path: reports/fi-vapaakappaleet-orig.json
hash: md5
md5: 50921e6a32b8d391ccfd01d55cbd23be
size: 94