| trainer: python-crfsuite | |
| c1: 0.5 | |
| c2: 0.001 | |
| max_iterations: 300 | |
| # Feature groups for ablation study | |
| # Set any group to false to disable it | |
| features: | |
| form: true # G1: S[0], S[0].lower | |
| type: true # G2: S[0].istitle, S[0].isupper, S[0].isdigit, S[0].ispunct, S[0].len | |
| morphology: true # G3: S[0].prefix2, S[0].suffix2 | |
| left: true # G4: S[-1], S[-1].lower, S[-2], S[-2].lower | |
| right: true # G5: S[1], S[1].lower, S[2], S[2].lower | |
| bigram: true # G6: S[-1,0], S[0,1] | |
| trigram: true # G7: S[-1,0,1] | |
| dictionary: true # G8: Dictionary lookup (external Viet74K: +0.26% word F1) | |
| dictionary_source: external # "training", "external", "combined" | |