Shopee_Sentiment_Analysis / src /build_features.py
cngchis's picture
init space
1946eb0
raw
history blame contribute delete
383 Bytes
from sklearn.feature_extraction.text import TfidfVectorizer
def build_features(texts, max_features=5000, ngram_range=(1, 2)):
vectorizer = TfidfVectorizer(
max_features=max_features,
ngram_range=ngram_range
)
X = vectorizer.fit_transform(texts)
return X, vectorizer
def transform_features(texts, vectorizer):
return vectorizer.transform(texts)