|
|
import gradio as gr |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.metrics import accuracy_score |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
file_path = "spam.csv" |
|
|
data = pd.read_csv(file_path, encoding='latin-1') |
|
|
data = data.rename(columns={"v1": "label", "v2": "text"}).loc[:, ["label", "text"]] |
|
|
data["label"] = data["label"].map({"ham": 0, "spam": 1}) |
|
|
|
|
|
|
|
|
tfidf = TfidfVectorizer(stop_words='english', max_features=3000) |
|
|
X = tfidf.fit_transform(data["text"]).toarray() |
|
|
y = data["label"] |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) |
|
|
|
|
|
|
|
|
model = LogisticRegression() |
|
|
model.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
accuracy = accuracy_score(y_test, model.predict(X_test)) |
|
|
print(f"Model Accuracy: {accuracy * 100:.2f}%") |
|
|
|
|
|
|
|
|
def predict_spam(text): |
|
|
transformed_text = tfidf.transform([text]).toarray() |
|
|
prediction = model.predict(transformed_text)[0] |
|
|
return "Spam" if prediction == 1 else "Non-Spam" |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=predict_spam, |
|
|
inputs=gr.Textbox(lines=5, placeholder="Enter email or message text here..."), |
|
|
outputs=gr.Label(label="Prediction"), |
|
|
title="Spam Email Detection", |
|
|
description="A web application to detect spam emails using machine learning. Enter the email text to check if it's spam or not.", |
|
|
live=False, |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch() |
|
|
|
|
|
|