rain1024 commited on
Commit
513f013
·
1 Parent(s): f2c46df

Add trained models (POS tagger and word segmentation)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .gitignore +1 -1
  3. models/pos_tagger/20260131_000000/metadata.yaml +44 -0
  4. models/pos_tagger/20260131_000000/model.crfsuite +3 -0
  5. models/pos_tagger/baseline-pos-10iter/metadata.yaml +26 -0
  6. models/pos_tagger/baseline-pos-10iter/model.crfsuite +3 -0
  7. models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml +26 -0
  8. models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite +3 -0
  9. models/pos_tagger/baseline-underthesea-10iter/metadata.yaml +26 -0
  10. models/pos_tagger/baseline-underthesea-10iter/model.crf +3 -0
  11. models/pos_tagger/crfsuite-rs/metadata.yaml +26 -0
  12. models/pos_tagger/crfsuite-rs/model.crfsuite +3 -0
  13. models/pos_tagger/fast-exp-10iter/metadata.yaml +26 -0
  14. models/pos_tagger/fast-exp-10iter/model.crf +3 -0
  15. models/pos_tagger/final-baseline-10iter/metadata.yaml +26 -0
  16. models/pos_tagger/final-baseline-10iter/model.crf +3 -0
  17. models/pos_tagger/optimized-pos/metadata.yaml +26 -0
  18. models/pos_tagger/optimized-pos/model.crf +3 -0
  19. models/pos_tagger/parallel-10iter/metadata.yaml +26 -0
  20. models/pos_tagger/parallel-10iter/model.crf +3 -0
  21. models/pos_tagger/python-crfsuite-v1/metadata.yaml +26 -0
  22. models/pos_tagger/python-crfsuite-v1/model.crfsuite +3 -0
  23. models/pos_tagger/python-crfsuite/metadata.yaml +26 -0
  24. models/pos_tagger/python-crfsuite/model.crfsuite +3 -0
  25. models/pos_tagger/simd-avx2-10iter/metadata.yaml +26 -0
  26. models/pos_tagger/simd-avx2-10iter/model.crf +3 -0
  27. models/pos_tagger/simd-v1/metadata.yaml +26 -0
  28. models/pos_tagger/simd-v1/model.crf +3 -0
  29. models/pos_tagger/simd-v2/metadata.yaml +26 -0
  30. models/pos_tagger/simd-v2/model.crf +3 -0
  31. models/pos_tagger/test-200iter-crfsuite/metadata.yaml +26 -0
  32. models/pos_tagger/test-200iter-crfsuite/model.crfsuite +3 -0
  33. models/pos_tagger/test-200iter/metadata.yaml +26 -0
  34. models/pos_tagger/test-200iter/model.crf +3 -0
  35. models/pos_tagger/test-crfsuite-style/metadata.yaml +26 -0
  36. models/pos_tagger/test-crfsuite-style/model.crf +3 -0
  37. models/pos_tagger/test-speed/metadata.yaml +26 -0
  38. models/pos_tagger/test-speed/model.crf +3 -0
  39. models/pos_tagger/underthesea-core-optimized/metadata.yaml +26 -0
  40. models/pos_tagger/underthesea-core-optimized/model.crf +3 -0
  41. models/pos_tagger/underthesea-core-v2/metadata.yaml +26 -0
  42. models/pos_tagger/underthesea-core-v2/model.crf +3 -0
  43. models/pos_tagger/underthesea-core-v3/metadata.yaml +26 -0
  44. models/pos_tagger/underthesea-core-v3/model.crf +3 -0
  45. models/pos_tagger/underthesea-core-v4/metadata.yaml +26 -0
  46. models/pos_tagger/underthesea-core-v4/model.crf +3 -0
  47. models/word_segmentation/20260131_000000/metadata.yaml +36 -0
  48. models/word_segmentation/20260131_000000/model.crfsuite +3 -0
  49. models/word_segmentation/20260131_041701/examples.output +2 -0
  50. models/word_segmentation/20260131_041701/metadata.yaml +34 -0
.gitattributes CHANGED
@@ -42,5 +42,6 @@ references/2021.naacl.nguyen/paper.pdf filter=lfs diff=lfs merge=lfs -text
42
  references/2021.naacl.nguyen/source/JointModel.pdf filter=lfs diff=lfs merge=lfs -text
43
  *.pdf filter=lfs diff=lfs merge=lfs -text
44
  *.crfsuite filter=lfs diff=lfs merge=lfs -text
 
45
  *.mco filter=lfs diff=lfs merge=lfs -text
46
  *.jar filter=lfs diff=lfs merge=lfs -text
 
42
  references/2021.naacl.nguyen/source/JointModel.pdf filter=lfs diff=lfs merge=lfs -text
43
  *.pdf filter=lfs diff=lfs merge=lfs -text
44
  *.crfsuite filter=lfs diff=lfs merge=lfs -text
45
+ *.crf filter=lfs diff=lfs merge=lfs -text
46
  *.mco filter=lfs diff=lfs merge=lfs -text
47
  *.jar filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -26,10 +26,10 @@ per_tag_metrics.png
26
  # Temporary model files (main model is tracked via Git LFS)
27
  *.crfsuite
28
  !pos_tagger.crfsuite
 
29
 
30
  # Logs
31
  *.log
32
  wandb/
33
 
34
- models.claude
35
  .claude
 
26
  # Temporary model files (main model is tracked via Git LFS)
27
  *.crfsuite
28
  !pos_tagger.crfsuite
29
+ !models/**/*.crfsuite
30
 
31
  # Logs
32
  *.log
33
  wandb/
34
 
 
35
  .claude
models/pos_tagger/20260131_000000/metadata.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # POS Tagger Model Metadata
2
+ # Auto-generated during training
3
+
4
+ model:
5
+ name: Vietnamese POS Tagger
6
+ version: "20260131_000000"
7
+ type: CRF (Conditional Random Field)
8
+ framework: python-crfsuite
9
+
10
+ training:
11
+ dataset: undertheseanlp/UDD-1
12
+ train_sentences: 18282
13
+ val_sentences: 859
14
+ test_sentences: 859
15
+ hyperparameters:
16
+ c1: 1.0
17
+ c2: 0.001
18
+ max_iterations: 100
19
+
20
+ performance:
21
+ test_accuracy: 0.9282
22
+ tags:
23
+ - ADJ
24
+ - ADP
25
+ - ADV
26
+ - AUX
27
+ - CCONJ
28
+ - DET
29
+ - NOUN
30
+ - NUM
31
+ - PART
32
+ - PRON
33
+ - PROPN
34
+ - PUNCT
35
+ - SCONJ
36
+ - VERB
37
+ - X
38
+
39
+ files:
40
+ model: model.crfsuite
41
+ config: ../../../configs/pos_tagger.yaml
42
+
43
+ created_at: "2026-01-31"
44
+ author: undertheseanlp
models/pos_tagger/20260131_000000/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
3
+ size 2366076
models/pos_tagger/baseline-pos-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: baseline-pos-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 68.24
16
+ performance:
17
+ test_accuracy: 0.7837
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:36:03'
26
+ author: undertheseanlp
models/pos_tagger/baseline-pos-10iter/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
3
+ size 14812304
models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: baseline-python-crfsuite-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 67.69
16
+ performance:
17
+ test_accuracy: 0.7837
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:56:21'
26
+ author: undertheseanlp
models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
3
+ size 14812304
models/pos_tagger/baseline-underthesea-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: baseline-underthesea-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 59.02
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:55:02'
26
+ author: undertheseanlp
models/pos_tagger/baseline-underthesea-10iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f0d10abcc1d2bc4695a0406e924c3a66b2517b16dd214d4d57b2b5391b5980
3
+ size 29270222
models/pos_tagger/crfsuite-rs/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: crfsuite-rs
4
+ type: CRF (Conditional Random Field)
5
+ framework: crfsuite-rs
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 100
15
+ duration_seconds: 165.75
16
+ performance:
17
+ test_accuracy: 0.9589
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 07:32:56'
26
+ author: undertheseanlp
models/pos_tagger/crfsuite-rs/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
3
+ size 2366076
models/pos_tagger/fast-exp-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: fast-exp-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 57.44
16
+ performance:
17
+ test_accuracy: 0.7553
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:10:30'
26
+ author: undertheseanlp
models/pos_tagger/fast-exp-10iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ef0e55cc1b76bfde88f4abe7c6689968b10ce754cb8e96d0da3655a9bf7f33
3
+ size 29349246
models/pos_tagger/final-baseline-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: final-baseline-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 59.49
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:15:11'
26
+ author: undertheseanlp
models/pos_tagger/final-baseline-10iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b729820a3972fd5806c25ea70ea287ac4108e9bd2ca6e30b5c86b24718c215fc
3
+ size 29270222
models/pos_tagger/optimized-pos/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: optimized-pos
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 349.17
16
+ performance:
17
+ test_accuracy: 0.9598
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:23:20'
26
+ author: undertheseanlp
models/pos_tagger/optimized-pos/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712a51a7b354ad1eb9a0a3b62a33163c935da437a1bd53b3c7c00f7aa84a3f05
3
+ size 25482030
models/pos_tagger/parallel-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: parallel-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 61.77
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:19:29'
26
+ author: undertheseanlp
models/pos_tagger/parallel-10iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2e3b99da852cfa438e3914ac4aab20f00e100709f4cecd325ccf2af60c57b8
3
+ size 29270222
models/pos_tagger/python-crfsuite-v1/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: python-crfsuite-v1
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 290.43
16
+ performance:
17
+ test_accuracy: 0.9598
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 11:00:59'
26
+ author: undertheseanlp
models/pos_tagger/python-crfsuite-v1/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
3
+ size 2139164
models/pos_tagger/python-crfsuite/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: python-crfsuite
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 100
15
+ duration_seconds: 148.29
16
+ performance:
17
+ test_accuracy: 0.9589
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 07:30:01'
26
+ author: undertheseanlp
models/pos_tagger/python-crfsuite/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
3
+ size 2366076
models/pos_tagger/simd-avx2-10iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: simd-avx2-10iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 65.04
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:04:12'
26
+ author: undertheseanlp
models/pos_tagger/simd-avx2-10iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731cfa98e9005c7efb6ba5f58aedef6118680f9bb2e901a7517c68ad4eeb41e0
3
+ size 29270222
models/pos_tagger/simd-v1/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: simd-v1
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 56.64
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:33:20'
26
+ author: undertheseanlp
models/pos_tagger/simd-v1/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418923286348fc36a7a10d1c3b14a012de5a42590049e9c8e69ecdd128ffcf8a
3
+ size 29270222
models/pos_tagger/simd-v2/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: simd-v2
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 10
15
+ duration_seconds: 59.18
16
+ performance:
17
+ test_accuracy: 0.7542
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 15:34:37'
26
+ author: undertheseanlp
models/pos_tagger/simd-v2/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11d51e4b8e635204e8331cf2929102a211c7bbc44aac86f5f3e0407df6b6eb5
3
+ size 29270222
models/pos_tagger/test-200iter-crfsuite/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: test-200iter-crfsuite
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 304.92
16
+ performance:
17
+ test_accuracy: 0.9598
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:31:06'
26
+ author: undertheseanlp
models/pos_tagger/test-200iter-crfsuite/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
3
+ size 2139164
models/pos_tagger/test-200iter/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: test-200iter
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 294.05
16
+ performance:
17
+ test_accuracy: 0.9597
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 16:25:50'
26
+ author: undertheseanlp
models/pos_tagger/test-200iter/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5958fc70df813ace61fb161c290f72af5924f92da40ad4df5812f5cb631820da
3
+ size 25482814
models/pos_tagger/test-crfsuite-style/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: test-crfsuite-style
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 100
15
+ duration_seconds: 461.7
16
+ performance:
17
+ test_accuracy: 0.9362
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 08:45:28'
26
+ author: undertheseanlp
models/pos_tagger/test-crfsuite-style/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803bd523eaf33c31cd1ce232d38c9d480f80311b5df296a26bac04326b256e11
3
+ size 27880622
models/pos_tagger/test-speed/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: test-speed
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 100
15
+ duration_seconds: 217.03
16
+ performance:
17
+ test_accuracy: 0.9362
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 08:32:29'
26
+ author: undertheseanlp
models/pos_tagger/test-speed/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21e4cdf643144be76574e7e9f3441baacafb6fcf7238adb64eac142feb5f63d
3
+ size 27880622
models/pos_tagger/underthesea-core-optimized/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: underthesea-core-optimized
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 100
15
+ duration_seconds: 194.48
16
+ performance:
17
+ test_accuracy: 0.9362
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 07:52:48'
26
+ author: undertheseanlp
models/pos_tagger/underthesea-core-optimized/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6597733686d5c89ade1ae65a22f593ac1a1880728ff5547598e16efec37beb
3
+ size 27880622
models/pos_tagger/underthesea-core-v2/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: underthesea-core-v2
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 351.01
16
+ performance:
17
+ test_accuracy: 0.9556
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 08:08:10'
26
+ author: undertheseanlp
models/pos_tagger/underthesea-core-v2/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9f6fb7fa8c3390597439726b643aa74ee6581d57bbc003ab4b161cebc2cbeeb
3
+ size 26618990
models/pos_tagger/underthesea-core-v3/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: underthesea-core-v3
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 544.99
16
+ performance:
17
+ test_accuracy: 0.9598
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 10:52:59'
26
+ author: undertheseanlp
models/pos_tagger/underthesea-core-v3/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d9d411cf55d00c58cf83333bd7bb0b66898bd78cc34dea2e6f93271a5f6a56
3
+ size 25482670
models/pos_tagger/underthesea-core-v4/metadata.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese POS Tagger
3
+ version: underthesea-core-v4
4
+ type: CRF (Conditional Random Field)
5
+ framework: underthesea-core
6
+ training:
7
+ dataset: undertheseanlp/UDD-1
8
+ train_sentences: 18282
9
+ val_sentences: 859
10
+ test_sentences: 859
11
+ hyperparameters:
12
+ c1: 1.0
13
+ c2: 0.001
14
+ max_iterations: 200
15
+ duration_seconds: 479.03
16
+ performance:
17
+ test_accuracy: 0.9596
18
+ environment:
19
+ platform: Linux
20
+ cpu_model: AMD EPYC 7713 64-Core Processor
21
+ python_version: 3.12.3
22
+ files:
23
+ model: model.crfsuite
24
+ config: ../../../configs/pos_tagger.yaml
25
+ created_at: '2026-01-31 11:58:34'
26
+ author: undertheseanlp
models/pos_tagger/underthesea-core-v4/model.crf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e17ddb2c63318801c9fb770a53197b7d806fdf0cc57c12fcac771644c9248a2
3
+ size 25482782
models/word_segmentation/20260131_000000/metadata.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Word Segmentation Model Metadata
2
+ # Auto-generated during training
3
+
4
+ model:
5
+ name: Vietnamese Word Segmentation
6
+ version: "20260131_000000"
7
+ type: CRF (Conditional Random Field)
8
+ framework: python-crfsuite
9
+ tagging_scheme: BIO
10
+
11
+ training:
12
+ dataset: undertheseanlp/UDD-1
13
+ train_sentences: 18282
14
+ train_syllables: 563134
15
+ val_sentences: 859
16
+ val_syllables: 27170
17
+ test_sentences: 859
18
+ test_syllables: 26132
19
+ hyperparameters:
20
+ c1: 1.0
21
+ c2: 0.001
22
+ max_iterations: 100
23
+
24
+ performance:
25
+ syllable_accuracy: 0.9890
26
+ syllable_f1: 0.9890
27
+ word_precision: 0.9802
28
+ word_recall: 0.9801
29
+ word_f1: 0.9801
30
+
31
+ files:
32
+ model: model.crfsuite
33
+ config: ../../../configs/word_segmentation.yaml
34
+
35
+ created_at: "2026-01-31"
36
+ author: undertheseanlp
models/word_segmentation/20260131_000000/model.crfsuite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
3
+ size 1093088
models/word_segmentation/20260131_041701/examples.output ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Trên thế_giới , giá_vàng đang được giao_dịch ở mức 5.068 USD / ounce , mất thêm khoảng 280 đồng / USD so với phiên sáng_. Nếu tính trong một phiên , giá_vàng mất tổng_cộng gần 500 USD / ounce ( tương_đương mức giảm khoảng 15 triệu đồng ) ._Đây là mức giảm kỷ_lục trong lịch_sử biến_động của kim_loại quý này .
2
+ Hiện_giá vàng thế_giới quy_đổi theo tỷ_giá Vietcombank ( chưa bao_gồm thuế , phí ) vào_khoảng 160,4 triệu đồng /_lượng , thấp hơn vàng trong nước gần 20 triệu đồng /_lượng .
models/word_segmentation/20260131_041701/metadata.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: Vietnamese Word Segmentation
3
+ version: '20260131_041701'
4
+ type: CRF (Conditional Random Field)
5
+ framework: python-crfsuite
6
+ tagging_scheme: BIO
7
+ training:
8
+ dataset: undertheseanlp/UDD-1
9
+ train_sentences: 18282
10
+ train_syllables: 563134
11
+ val_sentences: 859
12
+ val_syllables: 27170
13
+ test_sentences: 859
14
+ test_syllables: 26132
15
+ hyperparameters:
16
+ c1: 1.0
17
+ c2: 0.001
18
+ max_iterations: 100
19
+ duration_seconds: 103.65
20
+ performance:
21
+ syllable_accuracy: 0.989
22
+ syllable_f1: 0.989
23
+ word_precision: 0.9802
24
+ word_recall: 0.9801
25
+ word_f1: 0.9801
26
+ environment:
27
+ platform: Linux
28
+ cpu_model: AMD EPYC 7713 64-Core Processor
29
+ python_version: 3.12.3
30
+ files:
31
+ model: model.crfsuite
32
+ config: ../../../configs/word_segmentation.yaml
33
+ created_at: '2026-01-31 04:18:45'
34
+ author: undertheseanlp