| model: | |
| name: Vietnamese Word Segmentation | |
| version: python-crfsuite-v1 | |
| type: CRF (Conditional Random Field) | |
| framework: python-crfsuite | |
| tagging_scheme: BIO | |
| training: | |
| dataset: undertheseanlp/UDD-1 | |
| train_sentences: 18282 | |
| train_syllables: 563134 | |
| val_sentences: 859 | |
| val_syllables: 27170 | |
| test_sentences: 859 | |
| test_syllables: 26132 | |
| hyperparameters: | |
| c1: 1.0 | |
| c2: 0.001 | |
| max_iterations: 200 | |
| duration_seconds: 154.39 | |
| performance: | |
| syllable_accuracy: 0.9889 | |
| syllable_f1: 0.9889 | |
| word_precision: 0.98 | |
| word_recall: 0.9799 | |
| word_f1: 0.98 | |
| environment: | |
| platform: Linux | |
| cpu_model: AMD EPYC 7713 64-Core Processor | |
| python_version: 3.12.3 | |
| files: | |
| model: model.crfsuite | |
| config: ../../../configs/word_segmentation.yaml | |
| created_at: '2026-01-31 10:58:43' | |
| author: undertheseanlp | |