import streamlit as st import joblib # Load the saved TF-IDF vectorizer vectorizer = joblib.load("vectorizer.pkl") # Load saved models models = { "Logistic Regression": joblib.load("logistic_regression_model.pkl"), "Naive Bayes": joblib.load("naive_bayes_model.pkl"), "K-Means Clustering": joblib.load("kmeans_model.pkl"), "Hierarchical Clustering": joblib.load("hierarchical_model.pkl") } # Streamlit UI st.title("📰 Fake News Detection App") st.sidebar.title("Select Model") selected_model_name = st.sidebar.selectbox("Choose a model:", list(models.keys())) # Tabs for Application & Model Evaluation app, model_eval = st.tabs(["Application", "Model Evaluation"]) with app: # Text input area user_input = st.text_area("Enter the news text:", height=200) if st.button("Predict"): if user_input.strip(): # Convert input text to TF-IDF features input_vectorized = vectorizer.transform([user_input]) # Select the chosen model selected_model = models[selected_model_name] # Make prediction prediction = selected_model.predict(input_vectorized) # Display result result = "REAL" if prediction[0] == 1 else "FAKE" st.subheader(f"Prediction: {result}") else: st.warning("⚠️ Please enter some text to predict.") with model_eval: st.header("Model Evaluation") st.write("Model comparison is essential in machine learning to ensure that the chosen model provides the best possible performance for a given task. Different models have varying strengths and weaknesses depending on the dataset, complexity, and generalization capability.") st.write("The Fake News Detection model was trained to classify news as 'FAKE' or 'REAL'. The dataset was taken from Kaggle.") st.write("Dataset by Rajat Kumar: [Kaggle Link](https://www.kaggle.com/datasets/rajatkumar30/fake-news)") # NAIVE BAYES st.header("Naive Bayes") st.image("naiveBayes.png") st.image("Naïve Bayes_classification_report.png") # LOGISTIC REGRESSION st.header("Logistic Regression") st.image("logisticRegression.png") st.image("Logistic Regression_classification_report.png") # K-MEANS st.header("K Means Clustering") st.image("KMeans.png") st.image("K-Means Clustering_classification_report.png") # HIERARCHICAL st.header("Hierarchical Clustering") st.image("hierarchical.png") st.image("Hierarchical Clustering_classification_report.png") st.header("Results Comparison") st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the four unsupervised and supervised learning classification algorithms chosen, Logistic Regression performs the best using this dataset")