import pandas as pd import pickle from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import PassiveAggressiveClassifier from sklearn.metrics import accuracy_score, classification_report # Load dataset df = pd.read_csv("news.csv") # Combine title + text if "title" in df.columns: df["content"] = df["title"].fillna("") + " " + df["text"].fillna("") else: df["content"] = df["text"].fillna("") # Drop empty rows df.dropna(subset=["content", "label"], inplace=True) # Features and labels X = df["content"] y = df["label"] # Split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) # TF-IDF vectorizer = TfidfVectorizer( stop_words="english", max_df=0.75, min_df=2, ngram_range=(1, 2) ) X_train_tfidf = vectorizer.fit_transform(X_train) X_test_tfidf = vectorizer.transform(X_test) # Model model = PassiveAggressiveClassifier(max_iter=1000, random_state=42) model.fit(X_train_tfidf, y_train) # Evaluate y_pred = model.predict(X_test_tfidf) print("Accuracy:", accuracy_score(y_test, y_pred)) print("\nClassification Report:\n") print(classification_report(y_test, y_pred)) # Save model with open("fake_news_model.pkl", "wb") as f: pickle.dump(model, f) # Save vectorizer with open("tfidf_vectorizer.pkl", "wb") as f: pickle.dump(vectorizer, f) print("Model and vectorizer saved successfully!")