model:
  name: Vietnamese Word Segmentation
  version: python-crfsuite-v1
  type: CRF (Conditional Random Field)
  framework: python-crfsuite
  tagging_scheme: BIO
training:
  dataset: undertheseanlp/UDD-1
  train_sentences: 18282
  train_syllables: 563134
  val_sentences: 859
  val_syllables: 27170
  test_sentences: 859
  test_syllables: 26132
  hyperparameters:
    c1: 1.0
    c2: 0.001
    max_iterations: 200
  duration_seconds: 154.39
performance:
  syllable_accuracy: 0.9889
  syllable_f1: 0.9889
  word_precision: 0.98
  word_recall: 0.9799
  word_f1: 0.98
environment:
  platform: Linux
  cpu_model: AMD EPYC 7713 64-Core Processor
  python_version: 3.12.3
files:
  model: model.crfsuite
  config: ../../../configs/word_segmentation.yaml
created_at: '2026-01-31 10:58:43'
author: undertheseanlp