import os import subprocess # Install required libraries if missing required_libs = ["streamlit", "pandas", "numpy", "scikit-learn", "nltk"] for lib in required_libs: subprocess.run(["pip", "install", lib]) import streamlit as st import pandas as pd import numpy as np import nltk from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from nltk.corpus import stopwords st.image("innomatics-footer-logo.webp") st.image("fake_logo.jpg") # Download NLTK stopwords nltk.download("stopwords") stop_words = set(stopwords.words("english")) # Load Datasets @st.cache_data def load_data(): df_fake = pd.read_csv("Fake.csv") df_real = pd.read_csv("True.csv") # Assign labels df_fake["label"] = 0 # Fake News df_real["label"] = 1 # Real News # Merge datasets df = pd.concat([df_fake, df_real], ignore_index=True) df = df.sample(n=10000, random_state=27).reset_index(drop=True) # Shuffle return df df = load_data() # Text Preprocessing Function def preprocess_text(text): text = text.lower() text = " ".join(word for word in text.split() if word not in stop_words) return text df["clean_text"] = df["text"].astype(str).apply(preprocess_text) # TF-IDF Vectorization vectorizer = TfidfVectorizer(max_features=2000) X = vectorizer.fit_transform(df["clean_text"]) # Target variable y = df["label"].values # Split Data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train Models models = { "Logistic Regression": LogisticRegression(), "Naive Bayes": MultinomialNB(), "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "Decision Tree": DecisionTreeClassifier(random_state=42), } # Streamlit App UI st.markdown("

📰 Fake News Detection App

", unsafe_allow_html=True) st.markdown("

Select a machine learning model and enter a news article to predict if it's Real or Fake.

", unsafe_allow_html=True) # Model Selection Dropdown st.markdown("

🔍 Choose a Machine Learning Model:

", unsafe_allow_html=True) selected_model = st.selectbox("", list(models.keys())) # Train Selected Model model = models[selected_model] model.fit(X_train, y_train) # Predictions y_pred = model.predict(X_test) # Classification Report accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) # Display Model Performance st.markdown("

📊 Model Performance

", unsafe_allow_html=True) st.write(f"Accuracy: {accuracy:.4f}", unsafe_allow_html=True) st.write(f"Precision: {precision:.4f}", unsafe_allow_html=True) st.write(f"Recall: {recall:.4f}", unsafe_allow_html=True) st.write(f"F1 Score: {f1:.4f}", unsafe_allow_html=True) # User Input # Styled Text Area Label st.markdown("

📝 Enter News Article:

", unsafe_allow_html=True) # Text Area for User Input news_input = st.text_area("", height=200) # Function to Predict News Authenticity def predict_news(article, model): clean_text = preprocess_text(article) text_features = vectorizer.transform([clean_text]).toarray() prediction = model.predict(text_features)[0] return "🟢 Real News" if prediction == 1 else "🔴 Fake News" if st.button("Check News Authenticity"): if news_input.strip() == "": st.warning("⚠ Please enter a news article before clicking the button.") else: result = predict_news(news_input, model) st.markdown("

Prediction Result:

", unsafe_allow_html=True) st.markdown(f"

{result}

", unsafe_allow_html=True)