Satyam0077's picture
Upload 4 files
b5c1242 verified
raw
history blame contribute delete
638 Bytes
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import numpy as np
from scipy.sparse import hstack
def create_features(df):
tfidf = TfidfVectorizer(max_features=1000)
X_tfidf = tfidf.fit_transform(df['clean_text'])
df['ticket_length'] = df['clean_text'].apply(lambda x: len(x.split()))
df['sentiment'] = df['clean_text'].apply(lambda x: TextBlob(x).sentiment.polarity)
X_features = hstack([
X_tfidf,
np.array(df['ticket_length']).reshape(-1, 1),
np.array(df['sentiment']).reshape(-1, 1)
])
return X_features, tfidf