File size: 383 Bytes
1946eb0
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from sklearn.feature_extraction.text import TfidfVectorizer


def build_features(texts, max_features=5000, ngram_range=(1, 2)):
    vectorizer = TfidfVectorizer(
        max_features=max_features,
        ngram_range=ngram_range
    )
    X = vectorizer.fit_transform(texts)
    return X, vectorizer


def transform_features(texts, vectorizer):
    return vectorizer.transform(texts)