File size: 687 Bytes
73173ad
 
27e6434
73173ad
27e6434
 
 
 
 
 
 
 
 
 
 
73173ad
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
trainer: python-crfsuite
c1: 0.5
c2: 0.001
max_iterations: 300

# Feature groups for ablation study
# Set any group to false to disable it
features:
  form: true        # G1: S[0], S[0].lower
  type: true        # G2: S[0].istitle, S[0].isupper, S[0].isdigit, S[0].ispunct, S[0].len
  morphology: true  # G3: S[0].prefix2, S[0].suffix2
  left: true        # G4: S[-1], S[-1].lower, S[-2], S[-2].lower
  right: true       # G5: S[1], S[1].lower, S[2], S[2].lower
  bigram: true      # G6: S[-1,0], S[0,1]
  trigram: true     # G7: S[-1,0,1]
  dictionary: true  # G8: Dictionary lookup (external Viet74K: +0.26% word F1)
  dictionary_source: external  # "training", "external", "combined"