Spaces:

SuriRaja
/

CFTP2A

Sleeping

App Files Files Community

CFTP2A / app.py

SuriRaja

Update app.py

1a74600 verified over 1 year ago

raw

history blame contribute delete

4.94 kB

	import pandas as pd
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.metrics import accuracy_score, precision_score, recall_score
	import matplotlib.pyplot as plt
	import gradio as gr

	# Sample dataset
	data = {
	"email": [
	"Win a free lottery now",
	"Hi, how are you?",
	"Cheap loans available",
	"Meeting at 10 AM",
	"Congratulations, you won!",
	"Can we reschedule our appointment?",
	"Get rich quick scheme",
	"Lunch at 1 PM?"
	],
	"label": [1, 0, 1, 0, 1, 0, 1, 0] # 1 is spam, 0 is not spam
	}

	df = pd.DataFrame(data)

	# Vectorize the text data
	vectorizer = CountVectorizer()
	X = vectorizer.fit_transform(df["email"])
	y = df["label"]

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

	# Initialize the model
	model = MultinomialNB()

	# Number of iterations for incremental training
	num_iterations = 10
	chunk_size = X_train.shape[0] // num_iterations

	# Lists to store metrics for each iteration
	accuracy_list = []
	precision_list = []
	recall_list = []

	# Incremental training function
	def incremental_train():
	for i in range(num_iterations):
	start = i * chunk_size
	end = (i + 1) * chunk_size if (i + 1) * chunk_size <= X_train.shape[0] else X_train.shape[0]

	if start == end: # Skip if no data in the chunk
	continue

	model.partial_fit(X_train[start:end], y_train[start:end], classes=[0, 1])

	# Predict on the test set
	y_pred = model.predict(X_test)

	# Calculate metrics
	accuracy = accuracy_score(y_test, y_pred)
	precision = precision_score(y_test, y_pred)
	recall = recall_score(y_test, y_pred)

	accuracy_list.append(accuracy)
	precision_list.append(precision)
	recall_list.append(recall)

	print(f"Iteration {i + 1} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}")

	# Plot the metrics
	plt.plot(range(1, len(accuracy_list) + 1), accuracy_list, label="Accuracy")
	plt.plot(range(1, len(precision_list) + 1), precision_list, label="Precision")
	plt.plot(range(1, len(recall_list) + 1), recall_list, label="Recall")
	plt.xlabel("Iteration")
	plt.ylabel("Score")
	plt.title("Model Performance Over Iterations")
	plt.legend()
	plt.savefig('/tmp/metrics_plot.png') # Save the plot to display in the UI

	# Define the prediction function for Gradio
	def classify_email(email):
	email_features = vectorizer.transform([email])
	prediction = model.predict(email_features)[0]
	return "Spam" if prediction == 1 else "Not Spam"

	# Explanation text
	explanation_text = """
	## Spam Email Classifier

	### Explanation
	This application demonstrates a simple machine learning model for classifying emails as "Spam" or "Not Spam". Here's how it works:

	1. Data Preparation: We use a small sample dataset of emails labeled as spam or not spam.
	2. Text Vectorization: The email text is converted into numerical features using `CountVectorizer`.
	3. Model Training: A `MultinomialNB` (Naive Bayes) model is trained incrementally over 10 iterations. During each iteration, the model is trained on a chunk of the training data, and its performance is evaluated on the test set.
	4. Model Evaluation: After each iteration, we calculate the accuracy, precision, and recall to monitor the model's performance.
	5. Visualization: The performance metrics are plotted to visualize how the model improves over iterations.
	6. Prediction: Once the model training is complete, users can input an email to classify it as "Spam" or "Not Spam".

	### Instructions
	1. Click the "Start Training" button to train the model over 10 iterations.
	2. After training is complete, enter an email text in the textbox below.
	3. Click "Submit" to see the classification result.
	4. Use the example inputs to quickly test the model.
	"""

	# Create the Gradio interface
	with gr.Blocks() as iface:
	gr.Markdown(explanation_text)

	with gr.Row():
	start_training_button = gr.Button("Start Training")

	gr.Markdown("### Performance Over Iterations")
	performance_image = gr.Image('/tmp/metrics_plot.png', visible=False)

	with gr.Row(visible=False) as prediction_row:
	email_input = gr.Textbox(lines=2, placeholder="Enter an email", label="Email Input")
	email_output = gr.Text(label="Prediction")
	classify_button = gr.Button("Submit")

	def run_training():
	incremental_train()
	return gr.update(visible=True), gr.update(visible=True)

	start_training_button.click(run_training, [], [performance_image, prediction_row])
	classify_button.click(fn=classify_email, inputs=email_input, outputs=email_output)

	iface.launch(share=True)