SignalMod / configs /features.yaml
JonnyBP's picture
feat: add preprocessing. #3
f46289d
raw
history blame
422 Bytes
preprocessing:
lowercase: true
remove_urls: true
remove_mentions: true
remove_emojis: true
remove_special_chars: true
remove_stopwords: true
lemmatize: true
min_token_length: 2
language: en
vectorization:
method: tfidf # tfidf | bow | both
tfidf:
max_features: 5000
ngram_range: [1, 2]
sublinear_tf: true
min_df: 3
bow:
max_features: 5000
ngram_range: [1, 1]
min_df: 3