| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from textblob import TextBlob | |
| import numpy as np | |
| from scipy.sparse import hstack | |
| def create_features(df): | |
| tfidf = TfidfVectorizer(max_features=1000) | |
| X_tfidf = tfidf.fit_transform(df['clean_text']) | |
| df['ticket_length'] = df['clean_text'].apply(lambda x: len(x.split())) | |
| df['sentiment'] = df['clean_text'].apply(lambda x: TextBlob(x).sentiment.polarity) | |
| X_features = hstack([ | |
| X_tfidf, | |
| np.array(df['ticket_length']).reshape(-1, 1), | |
| np.array(df['sentiment']).reshape(-1, 1) | |
| ]) | |
| return X_features, tfidf | |