|
|
import streamlit as st
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
import sklearn
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
|
from sklearn.naive_bayes import MultinomialNB
|
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
from sklearn.svm import SVC
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
|
|
|
|
|
df = pd.read_csv("spam.csv")
|
|
|
|
|
|
|
|
|
st.title(":green[Spam and Ham Detection]")
|
|
|
|
|
|
|
|
|
x = df["Message"]
|
|
|
y = df["Category"]
|
|
|
|
|
|
bow = CountVectorizer(stop_words="english")
|
|
|
final_data = pd.DataFrame(bow.fit_transform(x).toarray(), columns=bow.get_feature_names_out())
|
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(final_data, y, test_size=0.2, random_state=20)
|
|
|
|
|
|
|
|
|
models = {
|
|
|
"Naive Bayes": MultinomialNB(),
|
|
|
"KNN": KNeighborsClassifier(),
|
|
|
"Decision Tree": DecisionTreeClassifier(),
|
|
|
"Logistic Regression": LogisticRegression(),
|
|
|
"SVM": SVC()
|
|
|
}
|
|
|
|
|
|
|
|
|
model_choice = st.radio("Choose a Classification Algorithm", list(models.keys()))
|
|
|
|
|
|
|
|
|
obj = models[model_choice]
|
|
|
obj.fit(x_train, y_train)
|
|
|
y_pred = obj.predict(x_test)
|
|
|
accuracy = accuracy_score(y_test, y_pred)*100
|
|
|
|
|
|
|
|
|
if st.button("Show Accuracy"):
|
|
|
st.write(f"**Accuracy of {model_choice}:** {accuracy:.4f}")
|
|
|
|
|
|
|
|
|
email_input = st.text_input("enter email")
|
|
|
|
|
|
|
|
|
def predict_email(email):
|
|
|
data = bow.transform([email]).toarray()
|
|
|
prediction = obj.predict(data)[0]
|
|
|
st.write(f"**Prediction:** {prediction}")
|
|
|
|
|
|
|
|
|
if st.button("Predict Email"):
|
|
|
if email_input:
|
|
|
predict_email(email_input)
|
|
|
else:
|
|
|
st.write(":black[enter mail]")
|
|
|
|
|
|
|