import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd

  # Load and preprocess the dataset
file_path = "spam.csv"  # Ensure this is the correct path to your dataset
data = pd.read_csv(file_path, encoding='latin-1')
data = data.rename(columns={"v1": "label", "v2": "text"}).loc[:, ["label", "text"]]
data["label"] = data["label"].map({"ham": 0, "spam": 1})

  # TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_features=3000)
X = tfidf.fit_transform(data["text"]).toarray()
y = data["label"]

  # Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

  # Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

  # Check accuracy
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Model Accuracy: {accuracy * 100:.2f}%")

  # Prediction function
def predict_spam(text):
    transformed_text = tfidf.transform([text]).toarray()
    prediction = model.predict(transformed_text)[0]
    return "Spam" if prediction == 1 else "Non-Spam"

  # Gradio Interface
interface = gr.Interface(
    fn=predict_spam,
    inputs=gr.Textbox(lines=5, placeholder="Enter email or message text here..."),
    outputs=gr.Label(label="Prediction"),
    title="Spam Email Detection",
    description="A web application to detect spam emails using machine learning. Enter the email text to check if it's spam or not.",
    live=False,
)

  # Launch the app
interface.launch()