| import pandas as pd |
| from sklearn.feature_extraction.text import CountVectorizer |
| from sklearn.model_selection import train_test_split |
| from sklearn.naive_bayes import MultinomialNB |
| from sklearn.metrics import accuracy_score, precision_score, recall_score |
| import matplotlib.pyplot as plt |
| import gradio as gr |
|
|
| |
| data = { |
| "email": [ |
| "Win a free lottery now", |
| "Hi, how are you?", |
| "Cheap loans available", |
| "Meeting at 10 AM", |
| "Congratulations, you won!", |
| "Can we reschedule our appointment?", |
| "Get rich quick scheme", |
| "Lunch at 1 PM?" |
| ], |
| "label": [1, 0, 1, 0, 1, 0, 1, 0] |
| } |
|
|
| df = pd.DataFrame(data) |
|
|
| |
| vectorizer = CountVectorizer() |
| X = vectorizer.fit_transform(df["email"]) |
| y = df["label"] |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
|
|
| |
| model = MultinomialNB() |
|
|
| |
| num_iterations = 10 |
| chunk_size = X_train.shape[0] // num_iterations |
|
|
| |
| accuracy_list = [] |
| precision_list = [] |
| recall_list = [] |
|
|
| |
| def incremental_train(): |
| for i in range(num_iterations): |
| start = i * chunk_size |
| end = (i + 1) * chunk_size if (i + 1) * chunk_size <= X_train.shape[0] else X_train.shape[0] |
|
|
| if start == end: |
| continue |
|
|
| model.partial_fit(X_train[start:end], y_train[start:end], classes=[0, 1]) |
|
|
| |
| y_pred = model.predict(X_test) |
|
|
| |
| accuracy = accuracy_score(y_test, y_pred) |
| precision = precision_score(y_test, y_pred) |
| recall = recall_score(y_test, y_pred) |
|
|
| accuracy_list.append(accuracy) |
| precision_list.append(precision) |
| recall_list.append(recall) |
|
|
| print(f"Iteration {i + 1} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}") |
|
|
| |
| plt.plot(range(1, len(accuracy_list) + 1), accuracy_list, label="Accuracy") |
| plt.plot(range(1, len(precision_list) + 1), precision_list, label="Precision") |
| plt.plot(range(1, len(recall_list) + 1), recall_list, label="Recall") |
| plt.xlabel("Iteration") |
| plt.ylabel("Score") |
| plt.title("Model Performance Over Iterations") |
| plt.legend() |
| plt.savefig('/tmp/metrics_plot.png') |
|
|
| |
| def classify_email(email): |
| email_features = vectorizer.transform([email]) |
| prediction = model.predict(email_features)[0] |
| return "Spam" if prediction == 1 else "Not Spam" |
|
|
| |
| explanation_text = """ |
| ## Spam Email Classifier |
| |
| ### Explanation |
| This application demonstrates a simple machine learning model for classifying emails as "Spam" or "Not Spam". Here's how it works: |
| |
| 1. **Data Preparation**: We use a small sample dataset of emails labeled as spam or not spam. |
| 2. **Text Vectorization**: The email text is converted into numerical features using `CountVectorizer`. |
| 3. **Model Training**: A `MultinomialNB` (Naive Bayes) model is trained incrementally over 10 iterations. During each iteration, the model is trained on a chunk of the training data, and its performance is evaluated on the test set. |
| 4. **Model Evaluation**: After each iteration, we calculate the accuracy, precision, and recall to monitor the model's performance. |
| 5. **Visualization**: The performance metrics are plotted to visualize how the model improves over iterations. |
| 6. **Prediction**: Once the model training is complete, users can input an email to classify it as "Spam" or "Not Spam". |
| |
| ### Instructions |
| 1. Click the "Start Training" button to train the model over 10 iterations. |
| 2. After training is complete, enter an email text in the textbox below. |
| 3. Click "Submit" to see the classification result. |
| 4. Use the example inputs to quickly test the model. |
| """ |
|
|
| |
| with gr.Blocks() as iface: |
| gr.Markdown(explanation_text) |
| |
| with gr.Row(): |
| start_training_button = gr.Button("Start Training") |
| |
| gr.Markdown("### Performance Over Iterations") |
| performance_image = gr.Image('/tmp/metrics_plot.png', visible=False) |
| |
| with gr.Row(visible=False) as prediction_row: |
| email_input = gr.Textbox(lines=2, placeholder="Enter an email", label="Email Input") |
| email_output = gr.Text(label="Prediction") |
| classify_button = gr.Button("Submit") |
| |
| def run_training(): |
| incremental_train() |
| return gr.update(visible=True), gr.update(visible=True) |
| |
| start_training_button.click(run_training, [], [performance_image, prediction_row]) |
| classify_button.click(fn=classify_email, inputs=email_input, outputs=email_output) |
|
|
| iface.launch(share=True) |
|
|