rain1024's picture
Add trained models (POS tagger and word segmentation)
513f013
raw
history blame contribute delete
806 Bytes
model:
name: Vietnamese Word Segmentation
version: python-crfsuite-v1
type: CRF (Conditional Random Field)
framework: python-crfsuite
tagging_scheme: BIO
training:
dataset: undertheseanlp/UDD-1
train_sentences: 18282
train_syllables: 563134
val_sentences: 859
val_syllables: 27170
test_sentences: 859
test_syllables: 26132
hyperparameters:
c1: 1.0
c2: 0.001
max_iterations: 200
duration_seconds: 154.39
performance:
syllable_accuracy: 0.9889
syllable_f1: 0.9889
word_precision: 0.98
word_recall: 0.9799
word_f1: 0.98
environment:
platform: Linux
cpu_model: AMD EPYC 7713 64-Core Processor
python_version: 3.12.3
files:
model: model.crfsuite
config: ../../../configs/word_segmentation.yaml
created_at: '2026-01-31 10:58:43'
author: undertheseanlp