Add trained models (POS tagger and word segmentation)
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- .gitignore +1 -1
- models/pos_tagger/20260131_000000/metadata.yaml +44 -0
- models/pos_tagger/20260131_000000/model.crfsuite +3 -0
- models/pos_tagger/baseline-pos-10iter/metadata.yaml +26 -0
- models/pos_tagger/baseline-pos-10iter/model.crfsuite +3 -0
- models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml +26 -0
- models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite +3 -0
- models/pos_tagger/baseline-underthesea-10iter/metadata.yaml +26 -0
- models/pos_tagger/baseline-underthesea-10iter/model.crf +3 -0
- models/pos_tagger/crfsuite-rs/metadata.yaml +26 -0
- models/pos_tagger/crfsuite-rs/model.crfsuite +3 -0
- models/pos_tagger/fast-exp-10iter/metadata.yaml +26 -0
- models/pos_tagger/fast-exp-10iter/model.crf +3 -0
- models/pos_tagger/final-baseline-10iter/metadata.yaml +26 -0
- models/pos_tagger/final-baseline-10iter/model.crf +3 -0
- models/pos_tagger/optimized-pos/metadata.yaml +26 -0
- models/pos_tagger/optimized-pos/model.crf +3 -0
- models/pos_tagger/parallel-10iter/metadata.yaml +26 -0
- models/pos_tagger/parallel-10iter/model.crf +3 -0
- models/pos_tagger/python-crfsuite-v1/metadata.yaml +26 -0
- models/pos_tagger/python-crfsuite-v1/model.crfsuite +3 -0
- models/pos_tagger/python-crfsuite/metadata.yaml +26 -0
- models/pos_tagger/python-crfsuite/model.crfsuite +3 -0
- models/pos_tagger/simd-avx2-10iter/metadata.yaml +26 -0
- models/pos_tagger/simd-avx2-10iter/model.crf +3 -0
- models/pos_tagger/simd-v1/metadata.yaml +26 -0
- models/pos_tagger/simd-v1/model.crf +3 -0
- models/pos_tagger/simd-v2/metadata.yaml +26 -0
- models/pos_tagger/simd-v2/model.crf +3 -0
- models/pos_tagger/test-200iter-crfsuite/metadata.yaml +26 -0
- models/pos_tagger/test-200iter-crfsuite/model.crfsuite +3 -0
- models/pos_tagger/test-200iter/metadata.yaml +26 -0
- models/pos_tagger/test-200iter/model.crf +3 -0
- models/pos_tagger/test-crfsuite-style/metadata.yaml +26 -0
- models/pos_tagger/test-crfsuite-style/model.crf +3 -0
- models/pos_tagger/test-speed/metadata.yaml +26 -0
- models/pos_tagger/test-speed/model.crf +3 -0
- models/pos_tagger/underthesea-core-optimized/metadata.yaml +26 -0
- models/pos_tagger/underthesea-core-optimized/model.crf +3 -0
- models/pos_tagger/underthesea-core-v2/metadata.yaml +26 -0
- models/pos_tagger/underthesea-core-v2/model.crf +3 -0
- models/pos_tagger/underthesea-core-v3/metadata.yaml +26 -0
- models/pos_tagger/underthesea-core-v3/model.crf +3 -0
- models/pos_tagger/underthesea-core-v4/metadata.yaml +26 -0
- models/pos_tagger/underthesea-core-v4/model.crf +3 -0
- models/word_segmentation/20260131_000000/metadata.yaml +36 -0
- models/word_segmentation/20260131_000000/model.crfsuite +3 -0
- models/word_segmentation/20260131_041701/examples.output +2 -0
- models/word_segmentation/20260131_041701/metadata.yaml +34 -0
.gitattributes
CHANGED
|
@@ -42,5 +42,6 @@ references/2021.naacl.nguyen/paper.pdf filter=lfs diff=lfs merge=lfs -text
|
|
| 42 |
references/2021.naacl.nguyen/source/JointModel.pdf filter=lfs diff=lfs merge=lfs -text
|
| 43 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
| 44 |
*.crfsuite filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 45 |
*.mco filter=lfs diff=lfs merge=lfs -text
|
| 46 |
*.jar filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 42 |
references/2021.naacl.nguyen/source/JointModel.pdf filter=lfs diff=lfs merge=lfs -text
|
| 43 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
| 44 |
*.crfsuite filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.crf filter=lfs diff=lfs merge=lfs -text
|
| 46 |
*.mco filter=lfs diff=lfs merge=lfs -text
|
| 47 |
*.jar filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -26,10 +26,10 @@ per_tag_metrics.png
|
|
| 26 |
# Temporary model files (main model is tracked via Git LFS)
|
| 27 |
*.crfsuite
|
| 28 |
!pos_tagger.crfsuite
|
|
|
|
| 29 |
|
| 30 |
# Logs
|
| 31 |
*.log
|
| 32 |
wandb/
|
| 33 |
|
| 34 |
-
models.claude
|
| 35 |
.claude
|
|
|
|
| 26 |
# Temporary model files (main model is tracked via Git LFS)
|
| 27 |
*.crfsuite
|
| 28 |
!pos_tagger.crfsuite
|
| 29 |
+
!models/**/*.crfsuite
|
| 30 |
|
| 31 |
# Logs
|
| 32 |
*.log
|
| 33 |
wandb/
|
| 34 |
|
|
|
|
| 35 |
.claude
|
models/pos_tagger/20260131_000000/metadata.yaml
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# POS Tagger Model Metadata
|
| 2 |
+
# Auto-generated during training
|
| 3 |
+
|
| 4 |
+
model:
|
| 5 |
+
name: Vietnamese POS Tagger
|
| 6 |
+
version: "20260131_000000"
|
| 7 |
+
type: CRF (Conditional Random Field)
|
| 8 |
+
framework: python-crfsuite
|
| 9 |
+
|
| 10 |
+
training:
|
| 11 |
+
dataset: undertheseanlp/UDD-1
|
| 12 |
+
train_sentences: 18282
|
| 13 |
+
val_sentences: 859
|
| 14 |
+
test_sentences: 859
|
| 15 |
+
hyperparameters:
|
| 16 |
+
c1: 1.0
|
| 17 |
+
c2: 0.001
|
| 18 |
+
max_iterations: 100
|
| 19 |
+
|
| 20 |
+
performance:
|
| 21 |
+
test_accuracy: 0.9282
|
| 22 |
+
tags:
|
| 23 |
+
- ADJ
|
| 24 |
+
- ADP
|
| 25 |
+
- ADV
|
| 26 |
+
- AUX
|
| 27 |
+
- CCONJ
|
| 28 |
+
- DET
|
| 29 |
+
- NOUN
|
| 30 |
+
- NUM
|
| 31 |
+
- PART
|
| 32 |
+
- PRON
|
| 33 |
+
- PROPN
|
| 34 |
+
- PUNCT
|
| 35 |
+
- SCONJ
|
| 36 |
+
- VERB
|
| 37 |
+
- X
|
| 38 |
+
|
| 39 |
+
files:
|
| 40 |
+
model: model.crfsuite
|
| 41 |
+
config: ../../../configs/pos_tagger.yaml
|
| 42 |
+
|
| 43 |
+
created_at: "2026-01-31"
|
| 44 |
+
author: undertheseanlp
|
models/pos_tagger/20260131_000000/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
|
| 3 |
+
size 2366076
|
models/pos_tagger/baseline-pos-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: baseline-pos-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 68.24
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7837
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:36:03'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/baseline-pos-10iter/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
|
| 3 |
+
size 14812304
|
models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: baseline-python-crfsuite-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 67.69
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7837
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:56:21'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
|
| 3 |
+
size 14812304
|
models/pos_tagger/baseline-underthesea-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: baseline-underthesea-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 59.02
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:55:02'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/baseline-underthesea-10iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79f0d10abcc1d2bc4695a0406e924c3a66b2517b16dd214d4d57b2b5391b5980
|
| 3 |
+
size 29270222
|
models/pos_tagger/crfsuite-rs/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: crfsuite-rs
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: crfsuite-rs
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 100
|
| 15 |
+
duration_seconds: 165.75
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9589
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 07:32:56'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/crfsuite-rs/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
|
| 3 |
+
size 2366076
|
models/pos_tagger/fast-exp-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: fast-exp-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 57.44
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7553
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:10:30'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/fast-exp-10iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ef0e55cc1b76bfde88f4abe7c6689968b10ce754cb8e96d0da3655a9bf7f33
|
| 3 |
+
size 29349246
|
models/pos_tagger/final-baseline-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: final-baseline-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 59.49
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:15:11'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/final-baseline-10iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b729820a3972fd5806c25ea70ea287ac4108e9bd2ca6e30b5c86b24718c215fc
|
| 3 |
+
size 29270222
|
models/pos_tagger/optimized-pos/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: optimized-pos
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 349.17
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9598
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:23:20'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/optimized-pos/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:712a51a7b354ad1eb9a0a3b62a33163c935da437a1bd53b3c7c00f7aa84a3f05
|
| 3 |
+
size 25482030
|
models/pos_tagger/parallel-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: parallel-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 61.77
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:19:29'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/parallel-10iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e2e3b99da852cfa438e3914ac4aab20f00e100709f4cecd325ccf2af60c57b8
|
| 3 |
+
size 29270222
|
models/pos_tagger/python-crfsuite-v1/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: python-crfsuite-v1
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 290.43
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9598
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 11:00:59'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/python-crfsuite-v1/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
|
| 3 |
+
size 2139164
|
models/pos_tagger/python-crfsuite/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: python-crfsuite
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 100
|
| 15 |
+
duration_seconds: 148.29
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9589
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 07:30:01'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/python-crfsuite/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
|
| 3 |
+
size 2366076
|
models/pos_tagger/simd-avx2-10iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: simd-avx2-10iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 65.04
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:04:12'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/simd-avx2-10iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:731cfa98e9005c7efb6ba5f58aedef6118680f9bb2e901a7517c68ad4eeb41e0
|
| 3 |
+
size 29270222
|
models/pos_tagger/simd-v1/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: simd-v1
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 56.64
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:33:20'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/simd-v1/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:418923286348fc36a7a10d1c3b14a012de5a42590049e9c8e69ecdd128ffcf8a
|
| 3 |
+
size 29270222
|
models/pos_tagger/simd-v2/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: simd-v2
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 10
|
| 15 |
+
duration_seconds: 59.18
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.7542
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 15:34:37'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/simd-v2/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d11d51e4b8e635204e8331cf2929102a211c7bbc44aac86f5f3e0407df6b6eb5
|
| 3 |
+
size 29270222
|
models/pos_tagger/test-200iter-crfsuite/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: test-200iter-crfsuite
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 304.92
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9598
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:31:06'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/test-200iter-crfsuite/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
|
| 3 |
+
size 2139164
|
models/pos_tagger/test-200iter/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: test-200iter
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 294.05
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9597
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 16:25:50'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/test-200iter/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5958fc70df813ace61fb161c290f72af5924f92da40ad4df5812f5cb631820da
|
| 3 |
+
size 25482814
|
models/pos_tagger/test-crfsuite-style/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: test-crfsuite-style
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 100
|
| 15 |
+
duration_seconds: 461.7
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9362
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 08:45:28'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/test-crfsuite-style/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:803bd523eaf33c31cd1ce232d38c9d480f80311b5df296a26bac04326b256e11
|
| 3 |
+
size 27880622
|
models/pos_tagger/test-speed/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: test-speed
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 100
|
| 15 |
+
duration_seconds: 217.03
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9362
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 08:32:29'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/test-speed/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f21e4cdf643144be76574e7e9f3441baacafb6fcf7238adb64eac142feb5f63d
|
| 3 |
+
size 27880622
|
models/pos_tagger/underthesea-core-optimized/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: underthesea-core-optimized
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 100
|
| 15 |
+
duration_seconds: 194.48
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9362
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 07:52:48'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/underthesea-core-optimized/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa6597733686d5c89ade1ae65a22f593ac1a1880728ff5547598e16efec37beb
|
| 3 |
+
size 27880622
|
models/pos_tagger/underthesea-core-v2/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: underthesea-core-v2
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 351.01
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9556
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 08:08:10'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/underthesea-core-v2/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9f6fb7fa8c3390597439726b643aa74ee6581d57bbc003ab4b161cebc2cbeeb
|
| 3 |
+
size 26618990
|
models/pos_tagger/underthesea-core-v3/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: underthesea-core-v3
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 544.99
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9598
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 10:52:59'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/underthesea-core-v3/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4d9d411cf55d00c58cf83333bd7bb0b66898bd78cc34dea2e6f93271a5f6a56
|
| 3 |
+
size 25482670
|
models/pos_tagger/underthesea-core-v4/metadata.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese POS Tagger
|
| 3 |
+
version: underthesea-core-v4
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: underthesea-core
|
| 6 |
+
training:
|
| 7 |
+
dataset: undertheseanlp/UDD-1
|
| 8 |
+
train_sentences: 18282
|
| 9 |
+
val_sentences: 859
|
| 10 |
+
test_sentences: 859
|
| 11 |
+
hyperparameters:
|
| 12 |
+
c1: 1.0
|
| 13 |
+
c2: 0.001
|
| 14 |
+
max_iterations: 200
|
| 15 |
+
duration_seconds: 479.03
|
| 16 |
+
performance:
|
| 17 |
+
test_accuracy: 0.9596
|
| 18 |
+
environment:
|
| 19 |
+
platform: Linux
|
| 20 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 21 |
+
python_version: 3.12.3
|
| 22 |
+
files:
|
| 23 |
+
model: model.crfsuite
|
| 24 |
+
config: ../../../configs/pos_tagger.yaml
|
| 25 |
+
created_at: '2026-01-31 11:58:34'
|
| 26 |
+
author: undertheseanlp
|
models/pos_tagger/underthesea-core-v4/model.crf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e17ddb2c63318801c9fb770a53197b7d806fdf0cc57c12fcac771644c9248a2
|
| 3 |
+
size 25482782
|
models/word_segmentation/20260131_000000/metadata.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Word Segmentation Model Metadata
|
| 2 |
+
# Auto-generated during training
|
| 3 |
+
|
| 4 |
+
model:
|
| 5 |
+
name: Vietnamese Word Segmentation
|
| 6 |
+
version: "20260131_000000"
|
| 7 |
+
type: CRF (Conditional Random Field)
|
| 8 |
+
framework: python-crfsuite
|
| 9 |
+
tagging_scheme: BIO
|
| 10 |
+
|
| 11 |
+
training:
|
| 12 |
+
dataset: undertheseanlp/UDD-1
|
| 13 |
+
train_sentences: 18282
|
| 14 |
+
train_syllables: 563134
|
| 15 |
+
val_sentences: 859
|
| 16 |
+
val_syllables: 27170
|
| 17 |
+
test_sentences: 859
|
| 18 |
+
test_syllables: 26132
|
| 19 |
+
hyperparameters:
|
| 20 |
+
c1: 1.0
|
| 21 |
+
c2: 0.001
|
| 22 |
+
max_iterations: 100
|
| 23 |
+
|
| 24 |
+
performance:
|
| 25 |
+
syllable_accuracy: 0.9890
|
| 26 |
+
syllable_f1: 0.9890
|
| 27 |
+
word_precision: 0.9802
|
| 28 |
+
word_recall: 0.9801
|
| 29 |
+
word_f1: 0.9801
|
| 30 |
+
|
| 31 |
+
files:
|
| 32 |
+
model: model.crfsuite
|
| 33 |
+
config: ../../../configs/word_segmentation.yaml
|
| 34 |
+
|
| 35 |
+
created_at: "2026-01-31"
|
| 36 |
+
author: undertheseanlp
|
models/word_segmentation/20260131_000000/model.crfsuite
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
|
| 3 |
+
size 1093088
|
models/word_segmentation/20260131_041701/examples.output
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Trên thế_giới , giá_vàng đang được giao_dịch ở mức 5.068 USD / ounce , mất thêm khoảng 280 đồng / USD so với phiên sáng_. Nếu tính trong một phiên , giá_vàng mất tổng_cộng gần 500 USD / ounce ( tương_đương mức giảm khoảng 15 triệu đồng ) ._Đây là mức giảm kỷ_lục trong lịch_sử biến_động của kim_loại quý này .
|
| 2 |
+
Hiện_giá vàng thế_giới quy_đổi theo tỷ_giá Vietcombank ( chưa bao_gồm thuế , phí ) vào_khoảng 160,4 triệu đồng /_lượng , thấp hơn vàng trong nước gần 20 triệu đồng /_lượng .
|
models/word_segmentation/20260131_041701/metadata.yaml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: Vietnamese Word Segmentation
|
| 3 |
+
version: '20260131_041701'
|
| 4 |
+
type: CRF (Conditional Random Field)
|
| 5 |
+
framework: python-crfsuite
|
| 6 |
+
tagging_scheme: BIO
|
| 7 |
+
training:
|
| 8 |
+
dataset: undertheseanlp/UDD-1
|
| 9 |
+
train_sentences: 18282
|
| 10 |
+
train_syllables: 563134
|
| 11 |
+
val_sentences: 859
|
| 12 |
+
val_syllables: 27170
|
| 13 |
+
test_sentences: 859
|
| 14 |
+
test_syllables: 26132
|
| 15 |
+
hyperparameters:
|
| 16 |
+
c1: 1.0
|
| 17 |
+
c2: 0.001
|
| 18 |
+
max_iterations: 100
|
| 19 |
+
duration_seconds: 103.65
|
| 20 |
+
performance:
|
| 21 |
+
syllable_accuracy: 0.989
|
| 22 |
+
syllable_f1: 0.989
|
| 23 |
+
word_precision: 0.9802
|
| 24 |
+
word_recall: 0.9801
|
| 25 |
+
word_f1: 0.9801
|
| 26 |
+
environment:
|
| 27 |
+
platform: Linux
|
| 28 |
+
cpu_model: AMD EPYC 7713 64-Core Processor
|
| 29 |
+
python_version: 3.12.3
|
| 30 |
+
files:
|
| 31 |
+
model: model.crfsuite
|
| 32 |
+
config: ../../../configs/word_segmentation.yaml
|
| 33 |
+
created_at: '2026-01-31 04:18:45'
|
| 34 |
+
author: undertheseanlp
|