File size: 2,874 Bytes
8f57f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28e5d1f
8f57f3a
 
28e5d1f
 
 
 
8f57f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import joblib

# Load the saved TF-IDF vectorizer
vectorizer = joblib.load("vectorizer.pkl")

# Load saved models
models = {
    "Logistic Regression": joblib.load("logistic_regression_model.pkl"),
    "Naive Bayes": joblib.load("naive_bayes_model.pkl"),
    "K-Means Clustering": joblib.load("kmeans_model.pkl"),
    "Hierarchical Clustering": joblib.load("hierarchical_model.pkl")
}

# Streamlit UI
st.title("📰 Fake News Detection App")
st.sidebar.title("Select Model")
selected_model_name = st.sidebar.selectbox("Choose a model:", list(models.keys()))

# Tabs for Application & Model Evaluation
app, model_eval = st.tabs(["Application", "Model Evaluation"])



with app:
    # Text input area
    user_input = st.text_area("Enter the news text:", height=200)

    if st.button("Predict"):
        if user_input.strip():
            # Convert input text to TF-IDF features
            input_vectorized = vectorizer.transform([user_input])
            
            # Select the chosen model
            selected_model = models[selected_model_name]
            
            # Make prediction
            prediction = selected_model.predict(input_vectorized)
            
            # Display result
            result = "REAL" if prediction[0] == 1 else "FAKE"
            st.subheader(f"Prediction: {result}")
            
        else:
            st.warning("⚠️ Please enter some text to predict.")
with model_eval:
    st.header("Model Evaluation")
    st.write("Model comparison is essential in machine learning to ensure that the chosen model provides the best possible performance for a given task. Different models have varying strengths and weaknesses depending on the dataset, complexity, and generalization capability.")

    st.write("The Fake News Detection model was trained to classify news as 'FAKE' or 'REAL'. The dataset was taken from Kaggle.")
    st.write("Dataset by Rajat Kumar: [Kaggle Link](https://www.kaggle.com/datasets/rajatkumar30/fake-news)")


    # NAIVE BAYES
    st.header("Naive Bayes")
    st.image("naiveBayes.png")
    st.image("Naïve Bayes_classification_report.png")

    # LOGISTIC REGRESSION
    st.header("Logistic Regression")
    st.image("logisticRegression.png")
    st.image("Logistic Regression_classification_report.png")

    # K-MEANS
    st.header("K Means Clustering")
    st.image("KMeans.png")
    st.image("K-Means Clustering_classification_report.png")

    # HIERARCHICAL
    st.header("Hierarchical Clustering")
    st.image("hierarchical.png")
    st.image("Hierarchical Clustering_classification_report.png")

    st.header("Results Comparison")
    st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the four unsupervised and supervised learning classification algorithms chosen, Logistic Regression performs the best using this dataset")