import gradio as gr from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score import pandas as pd # Load and preprocess the dataset file_path = "spam.csv" # Ensure this is the correct path to your dataset data = pd.read_csv(file_path, encoding='latin-1') data = data.rename(columns={"v1": "label", "v2": "text"}).loc[:, ["label", "text"]] data["label"] = data["label"].map({"ham": 0, "spam": 1}) # TF-IDF Vectorization tfidf = TfidfVectorizer(stop_words='english', max_features=3000) X = tfidf.fit_transform(data["text"]).toarray() y = data["label"] # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Train a Logistic Regression model model = LogisticRegression() model.fit(X_train, y_train) # Check accuracy accuracy = accuracy_score(y_test, model.predict(X_test)) print(f"Model Accuracy: {accuracy * 100:.2f}%") # Prediction function def predict_spam(text): transformed_text = tfidf.transform([text]).toarray() prediction = model.predict(transformed_text)[0] return "Spam" if prediction == 1 else "Non-Spam" # Gradio Interface interface = gr.Interface( fn=predict_spam, inputs=gr.Textbox(lines=5, placeholder="Enter email or message text here..."), outputs=gr.Label(label="Prediction"), title="Spam Email Detection", description="A web application to detect spam emails using machine learning. Enter the email text to check if it's spam or not.", live=False, ) # Launch the app interface.launch()