Diabetes_readmissions / models /baseline_tfidf.py
MP44's picture
Upload 14 files
d51135f verified
raw
history blame contribute delete
494 Bytes
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
def build_tfidf_model():
model = Pipeline([
("tfidf", TfidfVectorizer(
max_features=20000,
ngram_range=(1, 2),
stop_words="english"
)),
("clf", LogisticRegression(
max_iter=1000,
class_weight="balanced"
))
])
return model