import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import accuracy_score, precision_score, recall_score import matplotlib.pyplot as plt import gradio as gr # Sample dataset data = { "email": [ "Win a free lottery now", "Hi, how are you?", "Cheap loans available", "Meeting at 10 AM", "Congratulations, you won!", "Can we reschedule our appointment?", "Get rich quick scheme", "Lunch at 1 PM?" ], "label": [1, 0, 1, 0, 1, 0, 1, 0] # 1 is spam, 0 is not spam } df = pd.DataFrame(data) # Vectorize the text data vectorizer = CountVectorizer() X = vectorizer.fit_transform(df["email"]) y = df["label"] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Initialize the model model = MultinomialNB() # Number of iterations for incremental training num_iterations = 10 chunk_size = X_train.shape[0] // num_iterations # Lists to store metrics for each iteration accuracy_list = [] precision_list = [] recall_list = [] # Incremental training function def incremental_train(): for i in range(num_iterations): start = i * chunk_size end = (i + 1) * chunk_size if (i + 1) * chunk_size <= X_train.shape[0] else X_train.shape[0] if start == end: # Skip if no data in the chunk continue model.partial_fit(X_train[start:end], y_train[start:end], classes=[0, 1]) # Predict on the test set y_pred = model.predict(X_test) # Calculate metrics accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) accuracy_list.append(accuracy) precision_list.append(precision) recall_list.append(recall) print(f"Iteration {i + 1} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}") # Plot the metrics plt.plot(range(1, len(accuracy_list) + 1), accuracy_list, label="Accuracy") plt.plot(range(1, len(precision_list) + 1), precision_list, label="Precision") plt.plot(range(1, len(recall_list) + 1), recall_list, label="Recall") plt.xlabel("Iteration") plt.ylabel("Score") plt.title("Model Performance Over Iterations") plt.legend() plt.savefig('/tmp/metrics_plot.png') # Save the plot to display in the UI # Define the prediction function for Gradio def classify_email(email): email_features = vectorizer.transform([email]) prediction = model.predict(email_features)[0] return "Spam" if prediction == 1 else "Not Spam" # Explanation text explanation_text = """ ## Spam Email Classifier ### Explanation This application demonstrates a simple machine learning model for classifying emails as "Spam" or "Not Spam". Here's how it works: 1. **Data Preparation**: We use a small sample dataset of emails labeled as spam or not spam. 2. **Text Vectorization**: The email text is converted into numerical features using `CountVectorizer`. 3. **Model Training**: A `MultinomialNB` (Naive Bayes) model is trained incrementally over 10 iterations. During each iteration, the model is trained on a chunk of the training data, and its performance is evaluated on the test set. 4. **Model Evaluation**: After each iteration, we calculate the accuracy, precision, and recall to monitor the model's performance. 5. **Visualization**: The performance metrics are plotted to visualize how the model improves over iterations. 6. **Prediction**: Once the model training is complete, users can input an email to classify it as "Spam" or "Not Spam". ### Instructions 1. Click the "Start Training" button to train the model over 10 iterations. 2. After training is complete, enter an email text in the textbox below. 3. Click "Submit" to see the classification result. 4. Use the example inputs to quickly test the model. """ # Create the Gradio interface with gr.Blocks() as iface: gr.Markdown(explanation_text) with gr.Row(): start_training_button = gr.Button("Start Training") gr.Markdown("### Performance Over Iterations") performance_image = gr.Image('/tmp/metrics_plot.png', visible=False) with gr.Row(visible=False) as prediction_row: email_input = gr.Textbox(lines=2, placeholder="Enter an email", label="Email Input") email_output = gr.Text(label="Prediction") classify_button = gr.Button("Submit") def run_training(): incremental_train() return gr.update(visible=True), gr.update(visible=True) start_training_button.click(run_training, [], [performance_image, prediction_row]) classify_button.click(fn=classify_email, inputs=email_input, outputs=email_output) iface.launch(share=True)