|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
from sklearn.metrics import classification_report
|
|
|
import pickle
|
|
|
|
|
|
|
|
|
df = pd.read_csv("consumer_complaints_100k.csv")
|
|
|
|
|
|
|
|
|
df.columns = df.columns.str.strip()
|
|
|
|
|
|
|
|
|
df = df[['Consumer complaint narrative', 'Product']].dropna()
|
|
|
|
|
|
|
|
|
X = df['Consumer complaint narrative']
|
|
|
y = df['Product']
|
|
|
|
|
|
|
|
|
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
|
|
|
X_vec = vectorizer.fit_transform(X)
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
|
model = LogisticRegression(max_iter=1000)
|
|
|
model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
y_pred = model.predict(X_test)
|
|
|
print("\nClassification Report:\n")
|
|
|
print(classification_report(y_test, y_pred))
|
|
|
|
|
|
|
|
|
with open("model.pkl", "wb") as f:
|
|
|
pickle.dump((model, vectorizer), f)
|
|
|
|
|
|
print("\n✅ Model başarıyla eğitildi ve 'model.pkl' dosyasına kaydedildi.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|