CristopherWVSU's picture
Update app.py
28e5d1f verified
import streamlit as st
import joblib
# Load the saved TF-IDF vectorizer
vectorizer = joblib.load("vectorizer.pkl")
# Load saved models
models = {
"Logistic Regression": joblib.load("logistic_regression_model.pkl"),
"Naive Bayes": joblib.load("naive_bayes_model.pkl"),
"K-Means Clustering": joblib.load("kmeans_model.pkl"),
"Hierarchical Clustering": joblib.load("hierarchical_model.pkl")
}
# Streamlit UI
st.title("📰 Fake News Detection App")
st.sidebar.title("Select Model")
selected_model_name = st.sidebar.selectbox("Choose a model:", list(models.keys()))
# Tabs for Application & Model Evaluation
app, model_eval = st.tabs(["Application", "Model Evaluation"])
with app:
# Text input area
user_input = st.text_area("Enter the news text:", height=200)
if st.button("Predict"):
if user_input.strip():
# Convert input text to TF-IDF features
input_vectorized = vectorizer.transform([user_input])
# Select the chosen model
selected_model = models[selected_model_name]
# Make prediction
prediction = selected_model.predict(input_vectorized)
# Display result
result = "REAL" if prediction[0] == 1 else "FAKE"
st.subheader(f"Prediction: {result}")
else:
st.warning("⚠️ Please enter some text to predict.")
with model_eval:
st.header("Model Evaluation")
st.write("Model comparison is essential in machine learning to ensure that the chosen model provides the best possible performance for a given task. Different models have varying strengths and weaknesses depending on the dataset, complexity, and generalization capability.")
st.write("The Fake News Detection model was trained to classify news as 'FAKE' or 'REAL'. The dataset was taken from Kaggle.")
st.write("Dataset by Rajat Kumar: [Kaggle Link](https://www.kaggle.com/datasets/rajatkumar30/fake-news)")
# NAIVE BAYES
st.header("Naive Bayes")
st.image("naiveBayes.png")
st.image("Naïve Bayes_classification_report.png")
# LOGISTIC REGRESSION
st.header("Logistic Regression")
st.image("logisticRegression.png")
st.image("Logistic Regression_classification_report.png")
# K-MEANS
st.header("K Means Clustering")
st.image("KMeans.png")
st.image("K-Means Clustering_classification_report.png")
# HIERARCHICAL
st.header("Hierarchical Clustering")
st.image("hierarchical.png")
st.image("Hierarchical Clustering_classification_report.png")
st.header("Results Comparison")
st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the four unsupervised and supervised learning classification algorithms chosen, Logistic Regression performs the best using this dataset")