File size: 638 Bytes
b5c1242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
import numpy as np
from scipy.sparse import hstack

def create_features(df):
    tfidf = TfidfVectorizer(max_features=1000)
    X_tfidf = tfidf.fit_transform(df['clean_text'])
    
    df['ticket_length'] = df['clean_text'].apply(lambda x: len(x.split()))
    df['sentiment'] = df['clean_text'].apply(lambda x: TextBlob(x).sentiment.polarity)
    
    X_features = hstack([
        X_tfidf,
        np.array(df['ticket_length']).reshape(-1, 1),
        np.array(df['sentiment']).reshape(-1, 1)
    ])
    return X_features, tfidf