Spaces:

Cyanex099
/

spam

Sleeping

spam / app.py

Update app.py

703c563 verified about 1 year ago

1.68 kB

	import gradio as gr
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score
	import pandas as pd

	# Load and preprocess the dataset
	file_path = "spam.csv" # Ensure this is the correct path to your dataset
	data = pd.read_csv(file_path, encoding='latin-1')
	data = data.rename(columns={"v1": "label", "v2": "text"}).loc[:, ["label", "text"]]
	data["label"] = data["label"].map({"ham": 0, "spam": 1})

	# TF-IDF Vectorization
	tfidf = TfidfVectorizer(stop_words='english', max_features=3000)
	X = tfidf.fit_transform(data["text"]).toarray()
	y = data["label"]

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

	# Train a Logistic Regression model
	model = LogisticRegression()
	model.fit(X_train, y_train)

	# Check accuracy
	accuracy = accuracy_score(y_test, model.predict(X_test))
	print(f"Model Accuracy: {accuracy * 100:.2f}%")

	# Prediction function
	def predict_spam(text):
	transformed_text = tfidf.transform([text]).toarray()
	prediction = model.predict(transformed_text)[0]
	return "Spam" if prediction == 1 else "Non-Spam"

	# Gradio Interface
	interface = gr.Interface(
	fn=predict_spam,
	inputs=gr.Textbox(lines=5, placeholder="Enter email or message text here..."),
	outputs=gr.Label(label="Prediction"),
	title="Spam Email Detection",
	description="A web application to detect spam emails using machine learning. Enter the email text to check if it's spam or not.",
	live=False,
	)

	# Launch the app
	interface.launch()