import os import subprocess # Install required libraries if missing required_libs = ["streamlit", "pandas", "numpy", "scikit-learn", "nltk"] for lib in required_libs: subprocess.run(["pip", "install", lib]) import streamlit as st import pandas as pd import numpy as np import nltk from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from nltk.corpus import stopwords st.image("innomatics-footer-logo.webp") st.image("fake_logo.jpg") # Download NLTK stopwords nltk.download("stopwords") stop_words = set(stopwords.words("english")) # Load Datasets @st.cache_data def load_data(): df_fake = pd.read_csv("Fake.csv") df_real = pd.read_csv("True.csv") # Assign labels df_fake["label"] = 0 # Fake News df_real["label"] = 1 # Real News # Merge datasets df = pd.concat([df_fake, df_real], ignore_index=True) df = df.sample(n=10000, random_state=27).reset_index(drop=True) # Shuffle return df df = load_data() # Text Preprocessing Function def preprocess_text(text): text = text.lower() text = " ".join(word for word in text.split() if word not in stop_words) return text df["clean_text"] = df["text"].astype(str).apply(preprocess_text) # TF-IDF Vectorization vectorizer = TfidfVectorizer(max_features=2000) X = vectorizer.fit_transform(df["clean_text"]) # Target variable y = df["label"].values # Split Data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train Models models = { "Logistic Regression": LogisticRegression(), "Naive Bayes": MultinomialNB(), "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "Decision Tree": DecisionTreeClassifier(random_state=42), } # Streamlit App UI st.markdown("
Select a machine learning model and enter a news article to predict if it's Real or Fake.
", unsafe_allow_html=True) # Model Selection Dropdown st.markdown("