Fake_News_Detector / train_model.py
abhijitdas2821's picture
Update train_model.py
a5bbe36 verified
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, classification_report
# Load dataset
df = pd.read_csv("news.csv")
# Combine title + text
if "title" in df.columns:
df["content"] = df["title"].fillna("") + " " + df["text"].fillna("")
else:
df["content"] = df["text"].fillna("")
# Drop empty rows
df.dropna(subset=["content", "label"], inplace=True)
# Features and labels
X = df["content"]
y = df["label"]
# Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# TF-IDF
vectorizer = TfidfVectorizer(
stop_words="english",
max_df=0.75,
min_df=2,
ngram_range=(1, 2)
)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
# Model
model = PassiveAggressiveClassifier(max_iter=1000, random_state=42)
model.fit(X_train_tfidf, y_train)
# Evaluate
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))
# Save model
with open("fake_news_model.pkl", "wb") as f:
pickle.dump(model, f)
# Save vectorizer
with open("tfidf_vectorizer.pkl", "wb") as f:
pickle.dump(vectorizer, f)
print("Model and vectorizer saved successfully!")