CFTP2A / app.py
SuriRaja's picture
Update app.py
1a74600 verified
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score
import matplotlib.pyplot as plt
import gradio as gr
# Sample dataset
data = {
"email": [
"Win a free lottery now",
"Hi, how are you?",
"Cheap loans available",
"Meeting at 10 AM",
"Congratulations, you won!",
"Can we reschedule our appointment?",
"Get rich quick scheme",
"Lunch at 1 PM?"
],
"label": [1, 0, 1, 0, 1, 0, 1, 0] # 1 is spam, 0 is not spam
}
df = pd.DataFrame(data)
# Vectorize the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["email"])
y = df["label"]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Initialize the model
model = MultinomialNB()
# Number of iterations for incremental training
num_iterations = 10
chunk_size = X_train.shape[0] // num_iterations
# Lists to store metrics for each iteration
accuracy_list = []
precision_list = []
recall_list = []
# Incremental training function
def incremental_train():
for i in range(num_iterations):
start = i * chunk_size
end = (i + 1) * chunk_size if (i + 1) * chunk_size <= X_train.shape[0] else X_train.shape[0]
if start == end: # Skip if no data in the chunk
continue
model.partial_fit(X_train[start:end], y_train[start:end], classes=[0, 1])
# Predict on the test set
y_pred = model.predict(X_test)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
accuracy_list.append(accuracy)
precision_list.append(precision)
recall_list.append(recall)
print(f"Iteration {i + 1} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}")
# Plot the metrics
plt.plot(range(1, len(accuracy_list) + 1), accuracy_list, label="Accuracy")
plt.plot(range(1, len(precision_list) + 1), precision_list, label="Precision")
plt.plot(range(1, len(recall_list) + 1), recall_list, label="Recall")
plt.xlabel("Iteration")
plt.ylabel("Score")
plt.title("Model Performance Over Iterations")
plt.legend()
plt.savefig('/tmp/metrics_plot.png') # Save the plot to display in the UI
# Define the prediction function for Gradio
def classify_email(email):
email_features = vectorizer.transform([email])
prediction = model.predict(email_features)[0]
return "Spam" if prediction == 1 else "Not Spam"
# Explanation text
explanation_text = """
## Spam Email Classifier
### Explanation
This application demonstrates a simple machine learning model for classifying emails as "Spam" or "Not Spam". Here's how it works:
1. **Data Preparation**: We use a small sample dataset of emails labeled as spam or not spam.
2. **Text Vectorization**: The email text is converted into numerical features using `CountVectorizer`.
3. **Model Training**: A `MultinomialNB` (Naive Bayes) model is trained incrementally over 10 iterations. During each iteration, the model is trained on a chunk of the training data, and its performance is evaluated on the test set.
4. **Model Evaluation**: After each iteration, we calculate the accuracy, precision, and recall to monitor the model's performance.
5. **Visualization**: The performance metrics are plotted to visualize how the model improves over iterations.
6. **Prediction**: Once the model training is complete, users can input an email to classify it as "Spam" or "Not Spam".
### Instructions
1. Click the "Start Training" button to train the model over 10 iterations.
2. After training is complete, enter an email text in the textbox below.
3. Click "Submit" to see the classification result.
4. Use the example inputs to quickly test the model.
"""
# Create the Gradio interface
with gr.Blocks() as iface:
gr.Markdown(explanation_text)
with gr.Row():
start_training_button = gr.Button("Start Training")
gr.Markdown("### Performance Over Iterations")
performance_image = gr.Image('/tmp/metrics_plot.png', visible=False)
with gr.Row(visible=False) as prediction_row:
email_input = gr.Textbox(lines=2, placeholder="Enter an email", label="Email Input")
email_output = gr.Text(label="Prediction")
classify_button = gr.Button("Submit")
def run_training():
incremental_train()
return gr.update(visible=True), gr.update(visible=True)
start_training_button.click(run_training, [], [performance_image, prediction_row])
classify_button.click(fn=classify_email, inputs=email_input, outputs=email_output)
iface.launch(share=True)