diff --git a/.gitattributes b/.gitattributes
index 24a49c7add6ae93aa3928cb19b922aa3171f1952..43adae2acb07a23584eda3119f481c7375a4a1a2 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -42,5 +42,6 @@ references/2021.naacl.nguyen/paper.pdf filter=lfs diff=lfs merge=lfs -text
 references/2021.naacl.nguyen/source/JointModel.pdf filter=lfs diff=lfs merge=lfs -text
 *.pdf filter=lfs diff=lfs merge=lfs -text
 *.crfsuite filter=lfs diff=lfs merge=lfs -text
+*.crf filter=lfs diff=lfs merge=lfs -text
 *.mco filter=lfs diff=lfs merge=lfs -text
 *.jar filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
index bbf6b040c7684dde394fa1313b5cef0775333682..6d20b04b8cfa9d7924e12b87333e9d37ba759aee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,10 +26,10 @@ per_tag_metrics.png
 # Temporary model files (main model is tracked via Git LFS)
 *.crfsuite
 !pos_tagger.crfsuite
+!models/**/*.crfsuite
 
 # Logs
 *.log
 wandb/
 
-models.claude
 .claude
diff --git a/models/pos_tagger/20260131_000000/metadata.yaml b/models/pos_tagger/20260131_000000/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..154ab2489c5ea7816cb0a8eb8f091ee03d482e1d
--- /dev/null
+++ b/models/pos_tagger/20260131_000000/metadata.yaml
@@ -0,0 +1,44 @@
+# POS Tagger Model Metadata
+# Auto-generated during training
+
+model:
+  name: Vietnamese POS Tagger
+  version: "20260131_000000"
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+
+performance:
+  test_accuracy: 0.9282
+  tags:
+    - ADJ
+    - ADP
+    - ADV
+    - AUX
+    - CCONJ
+    - DET
+    - NOUN
+    - NUM
+    - PART
+    - PRON
+    - PROPN
+    - PUNCT
+    - SCONJ
+    - VERB
+    - X
+
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+
+created_at: "2026-01-31"
+author: undertheseanlp
diff --git a/models/pos_tagger/20260131_000000/model.crfsuite b/models/pos_tagger/20260131_000000/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..0df4ec4a8af56c7c1516e7acc6ca3d7fad8d4dd3
--- /dev/null
+++ b/models/pos_tagger/20260131_000000/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
+size 2366076
diff --git a/models/pos_tagger/baseline-pos-10iter/metadata.yaml b/models/pos_tagger/baseline-pos-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ebaf698b563416465fc1bb2c9996e96b1e9358e
--- /dev/null
+++ b/models/pos_tagger/baseline-pos-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: baseline-pos-10iter
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 68.24
+performance:
+  test_accuracy: 0.7837
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:36:03'
+author: undertheseanlp
diff --git a/models/pos_tagger/baseline-pos-10iter/model.crfsuite b/models/pos_tagger/baseline-pos-10iter/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..dcd1b503e32fa62550f96a4f126844650fec7db6
--- /dev/null
+++ b/models/pos_tagger/baseline-pos-10iter/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
+size 14812304
diff --git a/models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml b/models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab11065b425a36430d11b2cef7447b6021fae296
--- /dev/null
+++ b/models/pos_tagger/baseline-python-crfsuite-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: baseline-python-crfsuite-10iter
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 67.69
+performance:
+  test_accuracy: 0.7837
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:56:21'
+author: undertheseanlp
diff --git a/models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite b/models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..dcd1b503e32fa62550f96a4f126844650fec7db6
--- /dev/null
+++ b/models/pos_tagger/baseline-python-crfsuite-10iter/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b1dcf8240e73a5953180f5da1c2a54aa6ef2dd67775ccd5c86a245489c1520
+size 14812304
diff --git a/models/pos_tagger/baseline-underthesea-10iter/metadata.yaml b/models/pos_tagger/baseline-underthesea-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7efc516e4f563c6286d61285818d6f2b84e6f276
--- /dev/null
+++ b/models/pos_tagger/baseline-underthesea-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: baseline-underthesea-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 59.02
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:55:02'
+author: undertheseanlp
diff --git a/models/pos_tagger/baseline-underthesea-10iter/model.crf b/models/pos_tagger/baseline-underthesea-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..42a3894c488256eaca779dcfc94aba43a2a15058
--- /dev/null
+++ b/models/pos_tagger/baseline-underthesea-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79f0d10abcc1d2bc4695a0406e924c3a66b2517b16dd214d4d57b2b5391b5980
+size 29270222
diff --git a/models/pos_tagger/crfsuite-rs/metadata.yaml b/models/pos_tagger/crfsuite-rs/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa44df4205dfd62fec6d3d8e12edc3425f1fb58c
--- /dev/null
+++ b/models/pos_tagger/crfsuite-rs/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: crfsuite-rs
+  type: CRF (Conditional Random Field)
+  framework: crfsuite-rs
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 165.75
+performance:
+  test_accuracy: 0.9589
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 07:32:56'
+author: undertheseanlp
diff --git a/models/pos_tagger/crfsuite-rs/model.crfsuite b/models/pos_tagger/crfsuite-rs/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..0df4ec4a8af56c7c1516e7acc6ca3d7fad8d4dd3
--- /dev/null
+++ b/models/pos_tagger/crfsuite-rs/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
+size 2366076
diff --git a/models/pos_tagger/fast-exp-10iter/metadata.yaml b/models/pos_tagger/fast-exp-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..96c743a51f7bd98c2410c44f022872e8590efb65
--- /dev/null
+++ b/models/pos_tagger/fast-exp-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: fast-exp-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 57.44
+performance:
+  test_accuracy: 0.7553
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:10:30'
+author: undertheseanlp
diff --git a/models/pos_tagger/fast-exp-10iter/model.crf b/models/pos_tagger/fast-exp-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..cdaf0853b0d9662471aab1b404382b1b267361cd
--- /dev/null
+++ b/models/pos_tagger/fast-exp-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79ef0e55cc1b76bfde88f4abe7c6689968b10ce754cb8e96d0da3655a9bf7f33
+size 29349246
diff --git a/models/pos_tagger/final-baseline-10iter/metadata.yaml b/models/pos_tagger/final-baseline-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..701d63a76291983dd119a0196fdf6bf2136e9d63
--- /dev/null
+++ b/models/pos_tagger/final-baseline-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: final-baseline-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 59.49
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:15:11'
+author: undertheseanlp
diff --git a/models/pos_tagger/final-baseline-10iter/model.crf b/models/pos_tagger/final-baseline-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..1b2043434691fc8960d2103d24d5535903b8d000
--- /dev/null
+++ b/models/pos_tagger/final-baseline-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b729820a3972fd5806c25ea70ea287ac4108e9bd2ca6e30b5c86b24718c215fc
+size 29270222
diff --git a/models/pos_tagger/optimized-pos/metadata.yaml b/models/pos_tagger/optimized-pos/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..665592bb829bb1af98843265389cf0568381f1d7
--- /dev/null
+++ b/models/pos_tagger/optimized-pos/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: optimized-pos
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 349.17
+performance:
+  test_accuracy: 0.9598
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:23:20'
+author: undertheseanlp
diff --git a/models/pos_tagger/optimized-pos/model.crf b/models/pos_tagger/optimized-pos/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..40413ffbabf24bbeb94e3dfd8dc33618d528a574
--- /dev/null
+++ b/models/pos_tagger/optimized-pos/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:712a51a7b354ad1eb9a0a3b62a33163c935da437a1bd53b3c7c00f7aa84a3f05
+size 25482030
diff --git a/models/pos_tagger/parallel-10iter/metadata.yaml b/models/pos_tagger/parallel-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a0a5b7e41b0a3ec0521ea6d68a63929c3bab622
--- /dev/null
+++ b/models/pos_tagger/parallel-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: parallel-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 61.77
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:19:29'
+author: undertheseanlp
diff --git a/models/pos_tagger/parallel-10iter/model.crf b/models/pos_tagger/parallel-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..2f4a3dbe38cb6312ae94bc897e41fe1a2c1e3d50
--- /dev/null
+++ b/models/pos_tagger/parallel-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e2e3b99da852cfa438e3914ac4aab20f00e100709f4cecd325ccf2af60c57b8
+size 29270222
diff --git a/models/pos_tagger/python-crfsuite-v1/metadata.yaml b/models/pos_tagger/python-crfsuite-v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea0f1374fe87e9ec7537b05ba2a04339bef0b17a
--- /dev/null
+++ b/models/pos_tagger/python-crfsuite-v1/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: python-crfsuite-v1
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 290.43
+performance:
+  test_accuracy: 0.9598
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 11:00:59'
+author: undertheseanlp
diff --git a/models/pos_tagger/python-crfsuite-v1/model.crfsuite b/models/pos_tagger/python-crfsuite-v1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..01e28c463107974d807608ff0591f71c264c6b11
--- /dev/null
+++ b/models/pos_tagger/python-crfsuite-v1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
+size 2139164
diff --git a/models/pos_tagger/python-crfsuite/metadata.yaml b/models/pos_tagger/python-crfsuite/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8c6272de391bbe056943aa99e90489ab3eabd97
--- /dev/null
+++ b/models/pos_tagger/python-crfsuite/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: python-crfsuite
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 148.29
+performance:
+  test_accuracy: 0.9589
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 07:30:01'
+author: undertheseanlp
diff --git a/models/pos_tagger/python-crfsuite/model.crfsuite b/models/pos_tagger/python-crfsuite/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..0df4ec4a8af56c7c1516e7acc6ca3d7fad8d4dd3
--- /dev/null
+++ b/models/pos_tagger/python-crfsuite/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27dfbf196829379c69feda056d53482b3cc69a7f134fc5b853b0ba3a0f80f139
+size 2366076
diff --git a/models/pos_tagger/simd-avx2-10iter/metadata.yaml b/models/pos_tagger/simd-avx2-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05883f9a3a8bb03bac4175182bc23d17787819ef
--- /dev/null
+++ b/models/pos_tagger/simd-avx2-10iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: simd-avx2-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 65.04
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:04:12'
+author: undertheseanlp
diff --git a/models/pos_tagger/simd-avx2-10iter/model.crf b/models/pos_tagger/simd-avx2-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..c080f62498ee195d79319a21683486a8b8c754ed
--- /dev/null
+++ b/models/pos_tagger/simd-avx2-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:731cfa98e9005c7efb6ba5f58aedef6118680f9bb2e901a7517c68ad4eeb41e0
+size 29270222
diff --git a/models/pos_tagger/simd-v1/metadata.yaml b/models/pos_tagger/simd-v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73ff7cd8d6b3e4f16e3d3f528cd7b3a323f81a6e
--- /dev/null
+++ b/models/pos_tagger/simd-v1/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: simd-v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 56.64
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:33:20'
+author: undertheseanlp
diff --git a/models/pos_tagger/simd-v1/model.crf b/models/pos_tagger/simd-v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..f61199c78b17c916018c3a8d3bfeae7af15e1b20
--- /dev/null
+++ b/models/pos_tagger/simd-v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:418923286348fc36a7a10d1c3b14a012de5a42590049e9c8e69ecdd128ffcf8a
+size 29270222
diff --git a/models/pos_tagger/simd-v2/metadata.yaml b/models/pos_tagger/simd-v2/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9737d32165c1e4d5dbca87dcbca75ede7932995d
--- /dev/null
+++ b/models/pos_tagger/simd-v2/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: simd-v2
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 59.18
+performance:
+  test_accuracy: 0.7542
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 15:34:37'
+author: undertheseanlp
diff --git a/models/pos_tagger/simd-v2/model.crf b/models/pos_tagger/simd-v2/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..3bb3cc14171a487ee482e8a18e86d56c1b356fb7
--- /dev/null
+++ b/models/pos_tagger/simd-v2/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d11d51e4b8e635204e8331cf2929102a211c7bbc44aac86f5f3e0407df6b6eb5
+size 29270222
diff --git a/models/pos_tagger/test-200iter-crfsuite/metadata.yaml b/models/pos_tagger/test-200iter-crfsuite/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c760e2b71e0918c6d97f30ef257897db02d3d01
--- /dev/null
+++ b/models/pos_tagger/test-200iter-crfsuite/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: test-200iter-crfsuite
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 304.92
+performance:
+  test_accuracy: 0.9598
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:31:06'
+author: undertheseanlp
diff --git a/models/pos_tagger/test-200iter-crfsuite/model.crfsuite b/models/pos_tagger/test-200iter-crfsuite/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..01e28c463107974d807608ff0591f71c264c6b11
--- /dev/null
+++ b/models/pos_tagger/test-200iter-crfsuite/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaf38e66198bafeac12b38cb6403656c8e51472e840e84699494b29034632ebe
+size 2139164
diff --git a/models/pos_tagger/test-200iter/metadata.yaml b/models/pos_tagger/test-200iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c428ba6987bf9bf91a2dc143a3cb4592c7697a2
--- /dev/null
+++ b/models/pos_tagger/test-200iter/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: test-200iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 294.05
+performance:
+  test_accuracy: 0.9597
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 16:25:50'
+author: undertheseanlp
diff --git a/models/pos_tagger/test-200iter/model.crf b/models/pos_tagger/test-200iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..2f997132e0e50979519dc202ded08eec7fd4697a
--- /dev/null
+++ b/models/pos_tagger/test-200iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5958fc70df813ace61fb161c290f72af5924f92da40ad4df5812f5cb631820da
+size 25482814
diff --git a/models/pos_tagger/test-crfsuite-style/metadata.yaml b/models/pos_tagger/test-crfsuite-style/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9beec08d4845906f32cf61c7a537c09fc683901f
--- /dev/null
+++ b/models/pos_tagger/test-crfsuite-style/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: test-crfsuite-style
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 461.7
+performance:
+  test_accuracy: 0.9362
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 08:45:28'
+author: undertheseanlp
diff --git a/models/pos_tagger/test-crfsuite-style/model.crf b/models/pos_tagger/test-crfsuite-style/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..956af1a8eead732ba1d6cc0567573a775472c28b
--- /dev/null
+++ b/models/pos_tagger/test-crfsuite-style/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:803bd523eaf33c31cd1ce232d38c9d480f80311b5df296a26bac04326b256e11
+size 27880622
diff --git a/models/pos_tagger/test-speed/metadata.yaml b/models/pos_tagger/test-speed/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78ecde133f65e9ae0846bde2be6341ebf7547287
--- /dev/null
+++ b/models/pos_tagger/test-speed/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: test-speed
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 217.03
+performance:
+  test_accuracy: 0.9362
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 08:32:29'
+author: undertheseanlp
diff --git a/models/pos_tagger/test-speed/model.crf b/models/pos_tagger/test-speed/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..5eee8cd0c1c84a554caccec8da88991ae7e88ea7
--- /dev/null
+++ b/models/pos_tagger/test-speed/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f21e4cdf643144be76574e7e9f3441baacafb6fcf7238adb64eac142feb5f63d
+size 27880622
diff --git a/models/pos_tagger/underthesea-core-optimized/metadata.yaml b/models/pos_tagger/underthesea-core-optimized/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f2f8e6fc8b082875c43e03b377ace2adc04c1c3
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-optimized/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: underthesea-core-optimized
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 194.48
+performance:
+  test_accuracy: 0.9362
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 07:52:48'
+author: undertheseanlp
diff --git a/models/pos_tagger/underthesea-core-optimized/model.crf b/models/pos_tagger/underthesea-core-optimized/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..571bc6a4143ca7f4048856fc972dac6d9b755ce1
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-optimized/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6597733686d5c89ade1ae65a22f593ac1a1880728ff5547598e16efec37beb
+size 27880622
diff --git a/models/pos_tagger/underthesea-core-v2/metadata.yaml b/models/pos_tagger/underthesea-core-v2/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29eebb57e4a80ebea58b7b2affff71971032e46c
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v2/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: underthesea-core-v2
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 351.01
+performance:
+  test_accuracy: 0.9556
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 08:08:10'
+author: undertheseanlp
diff --git a/models/pos_tagger/underthesea-core-v2/model.crf b/models/pos_tagger/underthesea-core-v2/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..c62b51f1c000a508459afce4a787aff31f9d9db2
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v2/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9f6fb7fa8c3390597439726b643aa74ee6581d57bbc003ab4b161cebc2cbeeb
+size 26618990
diff --git a/models/pos_tagger/underthesea-core-v3/metadata.yaml b/models/pos_tagger/underthesea-core-v3/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6ef125131f302d32cc81768d10b909554c57f2b
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v3/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: underthesea-core-v3
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 544.99
+performance:
+  test_accuracy: 0.9598
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 10:52:59'
+author: undertheseanlp
diff --git a/models/pos_tagger/underthesea-core-v3/model.crf b/models/pos_tagger/underthesea-core-v3/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..e2d0124d9522952e4a3b105573dc7cd92f44a521
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v3/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4d9d411cf55d00c58cf83333bd7bb0b66898bd78cc34dea2e6f93271a5f6a56
+size 25482670
diff --git a/models/pos_tagger/underthesea-core-v4/metadata.yaml b/models/pos_tagger/underthesea-core-v4/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c86ecacf8f4fafad88cae8a7ed76651d1f605d28
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v4/metadata.yaml
@@ -0,0 +1,26 @@
+model:
+  name: Vietnamese POS Tagger
+  version: underthesea-core-v4
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  val_sentences: 859
+  test_sentences: 859
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 479.03
+performance:
+  test_accuracy: 0.9596
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/pos_tagger.yaml
+created_at: '2026-01-31 11:58:34'
+author: undertheseanlp
diff --git a/models/pos_tagger/underthesea-core-v4/model.crf b/models/pos_tagger/underthesea-core-v4/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..f05ec0b78c82f596fd570e5233d75a7ccd0c9fa2
--- /dev/null
+++ b/models/pos_tagger/underthesea-core-v4/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e17ddb2c63318801c9fb770a53197b7d806fdf0cc57c12fcac771644c9248a2
+size 25482782
diff --git a/models/word_segmentation/20260131_000000/metadata.yaml b/models/word_segmentation/20260131_000000/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66ba1a0b6cb9c86919b0ce6ea0cc2d8fc6b9b9cb
--- /dev/null
+++ b/models/word_segmentation/20260131_000000/metadata.yaml
@@ -0,0 +1,36 @@
+# Word Segmentation Model Metadata
+# Auto-generated during training
+
+model:
+  name: Vietnamese Word Segmentation
+  version: "20260131_000000"
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+
+performance:
+  syllable_accuracy: 0.9890
+  syllable_f1: 0.9890
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+
+created_at: "2026-01-31"
+author: undertheseanlp
diff --git a/models/word_segmentation/20260131_000000/model.crfsuite b/models/word_segmentation/20260131_000000/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/20260131_000000/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/20260131_041701/examples.output b/models/word_segmentation/20260131_041701/examples.output
new file mode 100644
index 0000000000000000000000000000000000000000..08c6a1589f032ac4b69e8a51abc920d6363b4f82
--- /dev/null
+++ b/models/word_segmentation/20260131_041701/examples.output
@@ -0,0 +1,2 @@
+Trên thế_giới , giá_vàng đang được giao_dịch ở mức 5.068 USD / ounce , mất thêm khoảng 280 đồng / USD so với phiên sáng_. Nếu tính trong một phiên , giá_vàng mất tổng_cộng gần 500 USD / ounce ( tương_đương mức giảm khoảng 15 triệu đồng ) ._Đây là mức giảm kỷ_lục trong lịch_sử biến_động của kim_loại quý này .
+Hiện_giá vàng thế_giới quy_đổi theo tỷ_giá Vietcombank ( chưa bao_gồm thuế , phí ) vào_khoảng 160,4 triệu đồng /_lượng , thấp hơn vàng trong nước gần 20 triệu đồng /_lượng .
diff --git a/models/word_segmentation/20260131_041701/metadata.yaml b/models/word_segmentation/20260131_041701/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc75c142a3d7ce2c5cc371916f9611e92aaf9d27
--- /dev/null
+++ b/models/word_segmentation/20260131_041701/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: '20260131_041701'
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 103.65
+performance:
+  syllable_accuracy: 0.989
+  syllable_f1: 0.989
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 04:18:45'
+author: undertheseanlp
diff --git a/models/word_segmentation/20260131_041701/model.crfsuite b/models/word_segmentation/20260131_041701/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/20260131_041701/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/20260131_060411/metadata.yaml b/models/word_segmentation/20260131_060411/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ccd69f68fc5b574e45a6f204dd06a3b0c382986
--- /dev/null
+++ b/models/word_segmentation/20260131_060411/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: '20260131_060411'
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 125.06
+performance:
+  syllable_accuracy: 0.989
+  syllable_f1: 0.989
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:06:16'
+author: undertheseanlp
diff --git a/models/word_segmentation/20260131_060411/model.crfsuite b/models/word_segmentation/20260131_060411/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/20260131_060411/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/20260131_061406/metadata.yaml b/models/word_segmentation/20260131_061406/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3ba4703ef49189b7e9b353441a9d3a6f9ad00dd
--- /dev/null
+++ b/models/word_segmentation/20260131_061406/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: '20260131_061406'
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 94.48
+performance:
+  syllable_accuracy: 0.9774
+  syllable_f1: 0.9774
+  word_precision: 0.9582
+  word_recall: 0.9583
+  word_f1: 0.9582
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:15:40'
+author: undertheseanlp
diff --git a/models/word_segmentation/20260131_061406/model.crf b/models/word_segmentation/20260131_061406/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..18a72324e85a86ee5871181c6e1f521cf03c3431
--- /dev/null
+++ b/models/word_segmentation/20260131_061406/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04d718662d750fb2a1f55eabebc06032c0290f248608c689f43505b619e09016
+size 50022442
diff --git a/models/word_segmentation/baseline-10iter-b/metadata.yaml b/models/word_segmentation/baseline-10iter-b/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4957cec1e51be699ae8c29657702e01aa20b2ed1
--- /dev/null
+++ b/models/word_segmentation/baseline-10iter-b/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: baseline-10iter-b
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 45.93
+performance:
+  syllable_accuracy: 0.8982
+  syllable_f1: 0.8989
+  word_precision: 0.8212
+  word_recall: 0.8077
+  word_f1: 0.8144
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:13:06'
+author: undertheseanlp
diff --git a/models/word_segmentation/baseline-10iter-b/model.crfsuite b/models/word_segmentation/baseline-10iter-b/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..5b915a80bb34cd3de1dc1a2b4d0128b26583fa87
--- /dev/null
+++ b/models/word_segmentation/baseline-10iter-b/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f157e3441b629705edbff3f703aa5f814f6165877273ca1888e171570260817d
+size 7347980
diff --git a/models/word_segmentation/baseline-10iter/metadata.yaml b/models/word_segmentation/baseline-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..379c010f6c418a2235b7f87747748f5af6f7b13f
--- /dev/null
+++ b/models/word_segmentation/baseline-10iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: baseline-10iter
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 44.97
+performance:
+  syllable_accuracy: 0.8982
+  syllable_f1: 0.8989
+  word_precision: 0.8212
+  word_recall: 0.8077
+  word_f1: 0.8144
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:12:03'
+author: undertheseanlp
diff --git a/models/word_segmentation/baseline-10iter/model.crfsuite b/models/word_segmentation/baseline-10iter/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..5b915a80bb34cd3de1dc1a2b4d0128b26583fa87
--- /dev/null
+++ b/models/word_segmentation/baseline-10iter/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f157e3441b629705edbff3f703aa5f814f6165877273ca1888e171570260817d
+size 7347980
diff --git a/models/word_segmentation/baseline-python-crfsuite-10iter/metadata.yaml b/models/word_segmentation/baseline-python-crfsuite-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55f3060a72fdb0fde641682e942c6af97857cd79
--- /dev/null
+++ b/models/word_segmentation/baseline-python-crfsuite-10iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: baseline-python-crfsuite-10iter
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 47.28
+performance:
+  syllable_accuracy: 0.8982
+  syllable_f1: 0.8989
+  word_precision: 0.8212
+  word_recall: 0.8077
+  word_f1: 0.8144
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:58:15'
+author: undertheseanlp
diff --git a/models/word_segmentation/baseline-python-crfsuite-10iter/model.crfsuite b/models/word_segmentation/baseline-python-crfsuite-10iter/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..5b915a80bb34cd3de1dc1a2b4d0128b26583fa87
--- /dev/null
+++ b/models/word_segmentation/baseline-python-crfsuite-10iter/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f157e3441b629705edbff3f703aa5f814f6165877273ca1888e171570260817d
+size 7347980
diff --git a/models/word_segmentation/baseline-underthesea-10iter/metadata.yaml b/models/word_segmentation/baseline-underthesea-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cbabd29e578d05685168f31116c1d6bad464ccc8
--- /dev/null
+++ b/models/word_segmentation/baseline-underthesea-10iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: baseline-underthesea-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 38.71
+performance:
+  syllable_accuracy: 0.9019
+  syllable_f1: 0.902
+  word_precision: 0.8292
+  word_recall: 0.827
+  word_f1: 0.8281
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:57:19'
+author: undertheseanlp
diff --git a/models/word_segmentation/baseline-underthesea-10iter/model.crf b/models/word_segmentation/baseline-underthesea-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..b9647c3db47bb104ebbb5d9112321950b2c53089
--- /dev/null
+++ b/models/word_segmentation/baseline-underthesea-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe63c8bb66dbda5bd4869a87c784c9e70d553ad014e6c28d5f8c476a325bca1
+size 35092842
diff --git a/models/word_segmentation/crfsuiters_c2_1/metadata.yaml b/models/word_segmentation/crfsuiters_c2_1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db42312478a9bfcfae9295b9413d3038a01a09d4
--- /dev/null
+++ b/models/word_segmentation/crfsuiters_c2_1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: crfsuiters_c2_1
+  type: CRF (Conditional Random Field)
+  framework: crfsuite-rs
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 0.0
+    c2: 1.0
+    max_iterations: 100
+  duration_seconds: 118.28
+performance:
+  syllable_accuracy: 0.9848
+  syllable_f1: 0.9848
+  word_precision: 0.9717
+  word_recall: 0.9728
+  word_f1: 0.9723
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 07:12:56'
+author: undertheseanlp
diff --git a/models/word_segmentation/crfsuiters_c2_1/model.crfsuite b/models/word_segmentation/crfsuiters_c2_1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..36044b447ce3decba9cda0ab2cc7f5b738033e0e
--- /dev/null
+++ b/models/word_segmentation/crfsuiters_c2_1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c28c79dd6303b614811a26e328d525035b600d050982c91b6779919d9d2ed0b
+size 44378708
diff --git a/models/word_segmentation/crfsuiters_v1/metadata.yaml b/models/word_segmentation/crfsuiters_v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..669a349bce81d6035fbeb541af09ed1afbcbcae2
--- /dev/null
+++ b/models/word_segmentation/crfsuiters_v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: crfsuiters_v1
+  type: CRF (Conditional Random Field)
+  framework: crfsuite-rs
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 112.83
+performance:
+  syllable_accuracy: 0.989
+  syllable_f1: 0.989
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:39:29'
+author: undertheseanlp
diff --git a/models/word_segmentation/crfsuiters_v1/model.crfsuite b/models/word_segmentation/crfsuiters_v1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/crfsuiters_v1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/fast-exp-10iter/metadata.yaml b/models/word_segmentation/fast-exp-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d43442763a5cac17b127ef2600d028337f082598
--- /dev/null
+++ b/models/word_segmentation/fast-exp-10iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: fast-exp-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 38.08
+performance:
+  syllable_accuracy: 0.9012
+  syllable_f1: 0.9024
+  word_precision: 0.837
+  word_recall: 0.8129
+  word_f1: 0.8248
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 16:11:22'
+author: undertheseanlp
diff --git a/models/word_segmentation/fast-exp-10iter/model.crf b/models/word_segmentation/fast-exp-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..556fb85ac297940c169db7841ad33a25563e000c
--- /dev/null
+++ b/models/word_segmentation/fast-exp-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03822bb5d4446235e55201c93d2f924f61fa0ff072c3e74e08f532875764e733
+size 35114410
diff --git a/models/word_segmentation/optimized-200iter/metadata.yaml b/models/word_segmentation/optimized-200iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e74b91792c5a9500cc8d23ee7641819e74634b5
--- /dev/null
+++ b/models/word_segmentation/optimized-200iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: optimized-200iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 133.05
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.98
+  word_recall: 0.9799
+  word_f1: 0.9799
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:15:39'
+author: undertheseanlp
diff --git a/models/word_segmentation/optimized-200iter/model.crf b/models/word_segmentation/optimized-200iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..3a1531eed2910318b7d759875942bf4906280e87
--- /dev/null
+++ b/models/word_segmentation/optimized-200iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53a0dd23de6e4ee6a33d29e1ed6db03064350e4af71207932dab6d4feea1259b
+size 33274810
diff --git a/models/word_segmentation/optimized-v1/metadata.yaml b/models/word_segmentation/optimized-v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..854a162d43cbca666e6f71cd62b22f6c1d74d590
--- /dev/null
+++ b/models/word_segmentation/optimized-v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: optimized-v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 36.55
+performance:
+  syllable_accuracy: 0.9019
+  syllable_f1: 0.902
+  word_precision: 0.8292
+  word_recall: 0.827
+  word_f1: 0.8281
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 15:10:57'
+author: undertheseanlp
diff --git a/models/word_segmentation/optimized-v1/model.crf b/models/word_segmentation/optimized-v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..932a06220adbcf211d4e3ec7eb746143c71d5760
--- /dev/null
+++ b/models/word_segmentation/optimized-v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4f2f7440bc1755d5f0c46cca8693bc112e3e1aac3134bb033aa1de339a63c62
+size 35092842
diff --git a/models/word_segmentation/pycrfsuite_c2_1/metadata.yaml b/models/word_segmentation/pycrfsuite_c2_1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e19bcc8e057d0d1bb2a2ffb90035ce392b8a24d6
--- /dev/null
+++ b/models/word_segmentation/pycrfsuite_c2_1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: pycrfsuite_c2_1
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 0.0
+    c2: 1.0
+    max_iterations: 100
+  duration_seconds: 103.43
+performance:
+  syllable_accuracy: 0.9848
+  syllable_f1: 0.9848
+  word_precision: 0.9717
+  word_recall: 0.9728
+  word_f1: 0.9723
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 07:06:24'
+author: undertheseanlp
diff --git a/models/word_segmentation/pycrfsuite_c2_1/model.crfsuite b/models/word_segmentation/pycrfsuite_c2_1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..36044b447ce3decba9cda0ab2cc7f5b738033e0e
--- /dev/null
+++ b/models/word_segmentation/pycrfsuite_c2_1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c28c79dd6303b614811a26e328d525035b600d050982c91b6779919d9d2ed0b
+size 44378708
diff --git a/models/word_segmentation/pycrfsuite_v1/metadata.yaml b/models/word_segmentation/pycrfsuite_v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab2c0ea5b6d71dfe2b8c8a230e2f46d7b3351dd5
--- /dev/null
+++ b/models/word_segmentation/pycrfsuite_v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: pycrfsuite_v1
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 94.15
+performance:
+  syllable_accuracy: 0.989
+  syllable_f1: 0.989
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:37:31'
+author: undertheseanlp
diff --git a/models/word_segmentation/pycrfsuite_v1/model.crfsuite b/models/word_segmentation/pycrfsuite_v1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/pycrfsuite_v1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/python-crfsuite-20260131/metadata.yaml b/models/word_segmentation/python-crfsuite-20260131/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a156054d8ca4ef7e748b034a4c9880004a49a212
--- /dev/null
+++ b/models/word_segmentation/python-crfsuite-20260131/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: python-crfsuite-20260131
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 96.69
+performance:
+  syllable_accuracy: 0.989
+  syllable_f1: 0.989
+  word_precision: 0.9802
+  word_recall: 0.9801
+  word_f1: 0.9801
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:31:20'
+author: undertheseanlp
diff --git a/models/word_segmentation/python-crfsuite-20260131/model.crfsuite b/models/word_segmentation/python-crfsuite-20260131/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..c00379d70fed822c7a2f73bd18914148d855d6ce
--- /dev/null
+++ b/models/word_segmentation/python-crfsuite-20260131/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56dc5e49912bf944679695507f22876861da892faf627ce6ea26a249bc82c8d4
+size 1093088
diff --git a/models/word_segmentation/python-crfsuite-v1/metadata.yaml b/models/word_segmentation/python-crfsuite-v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2e40a58a92fb3ec3c58429b3b5e7a563563a05b
--- /dev/null
+++ b/models/word_segmentation/python-crfsuite-v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: python-crfsuite-v1
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 154.39
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.98
+  word_recall: 0.9799
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 10:58:43'
+author: undertheseanlp
diff --git a/models/word_segmentation/python-crfsuite-v1/model.crfsuite b/models/word_segmentation/python-crfsuite-v1/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..7bb287a1e691c1880f139a6353e1f00cb80f0874
--- /dev/null
+++ b/models/word_segmentation/python-crfsuite-v1/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:686c105bfeadadb2240cabc9df1a9d8f278beb3e7bb0db39562a21601b4bf1fc
+size 996768
diff --git a/models/word_segmentation/simd-avx2-10iter/metadata.yaml b/models/word_segmentation/simd-avx2-10iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..000018c0a83f7f023e773851202121cf53c786bf
--- /dev/null
+++ b/models/word_segmentation/simd-avx2-10iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: simd-avx2-10iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 10
+  duration_seconds: 38.67
+performance:
+  syllable_accuracy: 0.9019
+  syllable_f1: 0.902
+  word_precision: 0.8292
+  word_recall: 0.827
+  word_f1: 0.8281
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 16:05:04'
+author: undertheseanlp
diff --git a/models/word_segmentation/simd-avx2-10iter/model.crf b/models/word_segmentation/simd-avx2-10iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..0372bb3f8a88875560fa9f4c7538a8b7f344854c
--- /dev/null
+++ b/models/word_segmentation/simd-avx2-10iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:233ed97d7517011f4b730bac446c0cee61dd5857c28ee5eadc993b57f69b573f
+size 35092842
diff --git a/models/word_segmentation/test-200iter-crfsuite/metadata.yaml b/models/word_segmentation/test-200iter-crfsuite/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7e0210c90aa401268a8b70e2f10b5a115c01ea6
--- /dev/null
+++ b/models/word_segmentation/test-200iter-crfsuite/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: test-200iter-crfsuite
+  type: CRF (Conditional Random Field)
+  framework: python-crfsuite
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 162.61
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.98
+  word_recall: 0.9799
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 16:36:22'
+author: undertheseanlp
diff --git a/models/word_segmentation/test-200iter-crfsuite/model.crfsuite b/models/word_segmentation/test-200iter-crfsuite/model.crfsuite
new file mode 100644
index 0000000000000000000000000000000000000000..7bb287a1e691c1880f139a6353e1f00cb80f0874
--- /dev/null
+++ b/models/word_segmentation/test-200iter-crfsuite/model.crfsuite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:686c105bfeadadb2240cabc9df1a9d8f278beb3e7bb0db39562a21601b4bf1fc
+size 996768
diff --git a/models/word_segmentation/test-200iter/metadata.yaml b/models/word_segmentation/test-200iter/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45aaf9ce61117671ee8ca0ad77512eb66316e007
--- /dev/null
+++ b/models/word_segmentation/test-200iter/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: test-200iter
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 123.32
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.98
+  word_recall: 0.9799
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 16:33:28'
+author: undertheseanlp
diff --git a/models/word_segmentation/test-200iter/model.crf b/models/word_segmentation/test-200iter/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..a8bc1deaa2b8a63e5dfdd9f15ee7d7289f0fd128
--- /dev/null
+++ b/models/word_segmentation/test-200iter/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50a23e213812076a30e9b4b90920a8a27e6a79de7d811893f72d9a8968cffbe2
+size 33275866
diff --git a/models/word_segmentation/underthesea-core-v1/metadata.yaml b/models/word_segmentation/underthesea-core-v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7482bce166254a6b3f859b9012570179fa72910b
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea-core-v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 417.86
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.9801
+  word_recall: 0.9798
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 10:50:51'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea-core-v1/model.crf b/models/word_segmentation/underthesea-core-v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..0ab6f4f5b2f5952dd6db991a5437cb55bff295cf
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:053d6d3f916209f7e92c7a91abf351436ee9cfe03df6a85c81ca53573b6e3b51
+size 33275498
diff --git a/models/word_segmentation/underthesea-core-v4/metadata.yaml b/models/word_segmentation/underthesea-core-v4/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81bd0e87e4666611a02c7d5ba4679700a232cd99
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v4/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea-core-v4
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 1072.43
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.9801
+  word_recall: 0.9799
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 12:16:53'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea-core-v4/model.crf b/models/word_segmentation/underthesea-core-v4/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..0bdbda79f3adef64196dc73c1a812a7f9ee07876
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v4/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:152679c58fa1487c5bc337f5d09d05dfed49e5e856d87d8828ef8060e515dde0
+size 33275834
diff --git a/models/word_segmentation/underthesea-core-v5/metadata.yaml b/models/word_segmentation/underthesea-core-v5/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e69f159de03c44d80ac7ab47604fd14307b3099
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v5/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea-core-v5
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 2112.2
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.9801
+  word_recall: 0.9798
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 13:09:50'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea-core-v5/model.crf b/models/word_segmentation/underthesea-core-v5/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..4eccddd8f8346f15b1b881dc5c1b87b97b59a472
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v5/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:329d7e95364871cd6dd59e6a3c778ca4c2a142e1b314aee28bb4f2e5b056dcc2
+size 33275466
diff --git a/models/word_segmentation/underthesea-core-v6/metadata.yaml b/models/word_segmentation/underthesea-core-v6/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78e26f76cbecacb2e0ebc42316dcd5f7a9a67baa
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v6/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea-core-v6
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 200
+  duration_seconds: 1138.37
+performance:
+  syllable_accuracy: 0.9889
+  syllable_f1: 0.9889
+  word_precision: 0.9801
+  word_recall: 0.98
+  word_f1: 0.98
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 13:55:36'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea-core-v6/model.crf b/models/word_segmentation/underthesea-core-v6/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..4f5fba4afde5e155041447f6649905ab73111c8c
--- /dev/null
+++ b/models/word_segmentation/underthesea-core-v6/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf9c08703406aca14b5a600229f1011e98fce48988ae51c2f263c017910b516
+size 33275466
diff --git a/models/word_segmentation/underthesea_c2_1/metadata.yaml b/models/word_segmentation/underthesea_c2_1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28374c1ff8b5c3a3e87049f38d9b752964874e1f
--- /dev/null
+++ b/models/word_segmentation/underthesea_c2_1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea_c2_1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 0.0
+    c2: 1.0
+    max_iterations: 100
+  duration_seconds: 84.72
+performance:
+  syllable_accuracy: 0.9865
+  syllable_f1: 0.9865
+  word_precision: 0.9748
+  word_recall: 0.9754
+  word_f1: 0.9751
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 07:04:29'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea_c2_1/model.crf b/models/word_segmentation/underthesea_c2_1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..06c00a1770415c2b815971ddda46cb7a8804bb7f
--- /dev/null
+++ b/models/word_segmentation/underthesea_c2_1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48138ffefe550e4ac8f89094172264b6bf03d989725288a235495714d375294c
+size 50022442
diff --git a/models/word_segmentation/underthesea_crfsuite_v1/metadata.yaml b/models/word_segmentation/underthesea_crfsuite_v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..235aebd64b17b3d44cbeff1e2cf0b22f7bb996b5
--- /dev/null
+++ b/models/word_segmentation/underthesea_crfsuite_v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea_crfsuite_v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 80.17
+performance:
+  syllable_accuracy: 0.9837
+  syllable_f1: 0.9836
+  word_precision: 0.9688
+  word_recall: 0.9705
+  word_f1: 0.9697
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 07:02:48'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea_crfsuite_v1/model.crf b/models/word_segmentation/underthesea_crfsuite_v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..533cbab58e02f2d8d329b88cbbd4c95eff39355c
--- /dev/null
+++ b/models/word_segmentation/underthesea_crfsuite_v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad1b3d022d6ad04cafe0d3b90ef47e883572fa7529642e5d7dbbd33161cbe53
+size 33931370
diff --git a/models/word_segmentation/underthesea_owlqn_v1/metadata.yaml b/models/word_segmentation/underthesea_owlqn_v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2de2c74528b992d626a99e50d2d553658364c508
--- /dev/null
+++ b/models/word_segmentation/underthesea_owlqn_v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea_owlqn_v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 83.74
+performance:
+  syllable_accuracy: 0.9837
+  syllable_f1: 0.9837
+  word_precision: 0.9691
+  word_recall: 0.9703
+  word_f1: 0.9697
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:56:51'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea_owlqn_v1/model.crf b/models/word_segmentation/underthesea_owlqn_v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..2e0dde816c091d86bffee10fcad6b2849f1e93cd
--- /dev/null
+++ b/models/word_segmentation/underthesea_owlqn_v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5264d06a45eb476ba3ce751b65dbd2a4b05f38ea2855f381a1f1a2afdd2d7ad6
+size 33901114
diff --git a/models/word_segmentation/underthesea_v1/metadata.yaml b/models/word_segmentation/underthesea_v1/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a0dad28ee971a2ff24681301e3c3386f1e2bfb6
--- /dev/null
+++ b/models/word_segmentation/underthesea_v1/metadata.yaml
@@ -0,0 +1,34 @@
+model:
+  name: Vietnamese Word Segmentation
+  version: underthesea_v1
+  type: CRF (Conditional Random Field)
+  framework: underthesea-core
+  tagging_scheme: BIO
+training:
+  dataset: undertheseanlp/UDD-1
+  train_sentences: 18282
+  train_syllables: 563134
+  val_sentences: 859
+  val_syllables: 27170
+  test_sentences: 859
+  test_syllables: 26132
+  hyperparameters:
+    c1: 1.0
+    c2: 0.001
+    max_iterations: 100
+  duration_seconds: 88.92
+performance:
+  syllable_accuracy: 0.9774
+  syllable_f1: 0.9774
+  word_precision: 0.9582
+  word_recall: 0.9583
+  word_f1: 0.9582
+environment:
+  platform: Linux
+  cpu_model: AMD EPYC 7713 64-Core Processor
+  python_version: 3.12.3
+files:
+  model: model.crfsuite
+  config: ../../../configs/word_segmentation.yaml
+created_at: '2026-01-31 06:42:00'
+author: undertheseanlp
diff --git a/models/word_segmentation/underthesea_v1/model.crf b/models/word_segmentation/underthesea_v1/model.crf
new file mode 100644
index 0000000000000000000000000000000000000000..596023d23b84537b2b7182a9fcbc9206791f91cf
--- /dev/null
+++ b/models/word_segmentation/underthesea_v1/model.crf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a9538597dddf47c945580fc641025fdbfc4270d13c4c3070a1ae3a7f447572a
+size 50022442