File size: 402 Bytes
f3287af
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

def build_pipeline(random_state=42):
    vect = TfidfVectorizer(max_df=0.9, min_df=1, ngram_range=(1,2))
    clf = LogisticRegression(max_iter=1000, random_state=random_state)
    return Pipeline([
        ("tfidf", vect),
        ("clf", clf)
    ])