File size: 383 Bytes
1946eb0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | from sklearn.feature_extraction.text import TfidfVectorizer
def build_features(texts, max_features=5000, ngram_range=(1, 2)):
vectorizer = TfidfVectorizer(
max_features=max_features,
ngram_range=ngram_range
)
X = vectorizer.fit_transform(texts)
return X, vectorizer
def transform_features(texts, vectorizer):
return vectorizer.transform(texts) |