tre-1 / src /conf /model /default.yaml
rain1024's picture
Add external dictionary features for VLSP 2013 word segmentation
73173ad
raw
history blame contribute delete
687 Bytes
trainer: python-crfsuite
c1: 0.5
c2: 0.001
max_iterations: 300
# Feature groups for ablation study
# Set any group to false to disable it
features:
form: true # G1: S[0], S[0].lower
type: true # G2: S[0].istitle, S[0].isupper, S[0].isdigit, S[0].ispunct, S[0].len
morphology: true # G3: S[0].prefix2, S[0].suffix2
left: true # G4: S[-1], S[-1].lower, S[-2], S[-2].lower
right: true # G5: S[1], S[1].lower, S[2], S[2].lower
bigram: true # G6: S[-1,0], S[0,1]
trigram: true # G7: S[-1,0,1]
dictionary: true # G8: Dictionary lookup (external Viet74K: +0.26% word F1)
dictionary_source: external # "training", "external", "combined"