spam / app.py
Kh
Update app.py
703c563 verified
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
# Load and preprocess the dataset
file_path = "spam.csv" # Ensure this is the correct path to your dataset
data = pd.read_csv(file_path, encoding='latin-1')
data = data.rename(columns={"v1": "label", "v2": "text"}).loc[:, ["label", "text"]]
data["label"] = data["label"].map({"ham": 0, "spam": 1})
# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_features=3000)
X = tfidf.fit_transform(data["text"]).toarray()
y = data["label"]
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Check accuracy
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Model Accuracy: {accuracy * 100:.2f}%")
# Prediction function
def predict_spam(text):
transformed_text = tfidf.transform([text]).toarray()
prediction = model.predict(transformed_text)[0]
return "Spam" if prediction == 1 else "Non-Spam"
# Gradio Interface
interface = gr.Interface(
fn=predict_spam,
inputs=gr.Textbox(lines=5, placeholder="Enter email or message text here..."),
outputs=gr.Label(label="Prediction"),
title="Spam Email Detection",
description="A web application to detect spam emails using machine learning. Enter the email text to check if it's spam or not.",
live=False,
)
# Launch the app
interface.launch()