model: name: Vietnamese Word Segmentation version: python-crfsuite-v1 type: CRF (Conditional Random Field) framework: python-crfsuite tagging_scheme: BIO training: dataset: undertheseanlp/UDD-1 train_sentences: 18282 train_syllables: 563134 val_sentences: 859 val_syllables: 27170 test_sentences: 859 test_syllables: 26132 hyperparameters: c1: 1.0 c2: 0.001 max_iterations: 200 duration_seconds: 154.39 performance: syllable_accuracy: 0.9889 syllable_f1: 0.9889 word_precision: 0.98 word_recall: 0.9799 word_f1: 0.98 environment: platform: Linux cpu_model: AMD EPYC 7713 64-Core Processor python_version: 3.12.3 files: model: model.crfsuite config: ../../../configs/word_segmentation.yaml created_at: '2026-01-31 10:58:43' author: undertheseanlp