Spaces:

Suryacoder
/

Smart-Email-Sorter

Sleeping

App Files Files Community

Surya8663 commited on Oct 6, 2025

Commit

4c21dbb

1 Parent(s): 5c04706

Final version for free tier (removes gmail fetch)

Browse files

Files changed (1) hide show

backend/app.py +62 -49

backend/app.py CHANGED Viewed

@@ -1,66 +1,75 @@
-# backend/app.py (NEW VERSION USING A PRE-TRAINED MODEL)
 from transformers import pipeline
 import gradio as gr
 import pandas as pd
-# NEW, CORRECTED LINES
-from .gmail_fetcher import fetch_latest_emails
-from . import database
-# --- This is the core change ---
-# 1. We load a pre-trained "zero-shot-classification" pipeline from Hugging Face.
-# The first time this code runs, it will automatically download the model (approx. 1.6GB).
 print("Loading zero-shot classification model...")
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 print("Model loaded successfully.")
-# -----------------------------
 database.init_db()
-# --- These are the labels we want to sort emails into ---
-# You can change or add to this list!
 POSSIBLE_LABELS = ["Work", "Promotions", "Personal", "Spam", "Important"]
-email_storage = {}
-def predict_and_save_emails(df):
-    """Takes a DataFrame of emails, classifies them, and saves to DB."""
-    if df.empty:
-        return pd.DataFrame({"Message": ["No emails to process."]})
-    # Get the text content for classification
     sequences = (df['Subject'] + " " + df['Body']).tolist()
-    print(f"Classifying {len(sequences)} emails...")
-    # Use the zero-shot pipeline. It's powerful but can be slow on CPU for many emails.
     predictions = classifier(sequences, candidate_labels=POSSIBLE_LABELS)
     print("Classification complete.")
-    # Extract the top label and score for each prediction
-    pred_labels = [pred['labels'][0] for pred in predictions]
-    pred_scores = [pred['scores'][0] for pred in predictions]
-    # Add results to the DataFrame
     df['predicted_folder'] = pred_labels
     df['confidence'] = pred_scores
-    # Save to the database
     database.add_email_records(df)
-    print(f"Saved {len(df)} records to the database.")
     # Format for display
     df_display = df.copy()
     df_display['confidence'] = [f"{score:.2%}" for score in pred_scores]
-    return df_display[["From", "Subject", "predicted_folder", "confidence"]]
-def fetch_and_classify_emails():
-    """Fetches emails from Gmail and triggers the classification."""
-    emails = fetch_latest_emails()
-    if not emails:
-        email_storage.clear()
-        return pd.DataFrame({"From": ["No new emails found or an error occurred."], "Subject": [""]})
-    df = pd.DataFrame(emails)
-    return predict_and_save_emails(df)
 # --- History & Feedback Functions (Unchanged) ---
 def show_history():
@@ -73,20 +82,28 @@ def show_history():
 def save_feedback_and_refresh(record_id, corrected_label):
     if not record_id or not corrected_label:
         return show_history(), "Please enter a Record ID and a correct label first."
     record_id_int = int(record_id)
     database.update_feedback(record_id_int, corrected_label)
     return show_history(), f"Success! Record {record_id_int} updated."
-# --- Build the Gradio Interface (Unchanged) ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("## 📧 Smart Email Sorter (Zero-Shot Model)")
-    with gr.Tab("Fetch & Classify Gmail"):
-        predict_gmail_btn = gr.Button("Fetch and Classify Latest Emails", variant="primary")
-        email_display = gr.Dataframe(headers=["From", "Subject", "predicted_folder", "confidence"], label="Fetched & Classified Emails", wrap=True)
-        predict_gmail_btn.click(fn=fetch_and_classify_emails, outputs=email_display, show_progress="full")
     with gr.Tab("History & Feedback"):
         gr.Markdown("### Prediction History & Feedback")
@@ -101,8 +118,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         history_df = gr.Dataframe(label="All Saved Predictions", wrap=True)
         app.load(fn=show_history, outputs=history_df)
         refresh_history_btn.click(fn=show_history, outputs=history_df)
-        submit_feedback_btn.click(
-            fn=save_feedback_and_refresh,
-            inputs=[record_id_input, feedback_label],
-            outputs=[history_df, feedback_status]
-        )

+# backend/app.py (FINAL VERSION FOR FREE TIER)
 from transformers import pipeline
 import gradio as gr
 import pandas as pd
+# We no longer need gmail_fetcher, so we remove the import
+import database
+import sys
+# --- Diagnostic and Model Loading (Unchanged) ---
+print("--- Starting Application ---")
+try:
+    print(f"Gradio Version: {gr.__version__}")
+except Exception as e:
+    print(f"Could not get Gradio version: {e}")
+print("--------------------------")
 print("Loading zero-shot classification model...")
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 print("Model loaded successfully.")
 database.init_db()
 POSSIBLE_LABELS = ["Work", "Promotions", "Personal", "Spam", "Important"]
+# --- NEW/RESTORED: Functions for Single and Batch Prediction ---
+def predict_single(subject, body):
+    """Classifies a single email from subject and body text."""
+    if not subject.strip() and not body.strip():
+        return "Please provide a subject or body."
+    sequence = subject + " " + body
+    prediction = classifier(sequence, candidate_labels=POSSIBLE_LABELS)
+    top_label = prediction['labels'][0]
+    top_score = prediction['scores'][0]
+    return f"Predicted Folder: {top_label} (Confidence: {top_score:.2%})"
+def predict_batch_csv(file):
+    """Classifies a batch of emails from an uploaded CSV file."""
+    try:
+        df = pd.read_csv(file.name)
+    except Exception as e:
+        return pd.DataFrame({"Error": [f"Failed to read CSV: {e}"]})
+    if 'subject' not in df.columns or 'body' not in df.columns:
+        return pd.DataFrame({"Error": ["CSV must have 'subject' and 'body' columns."]})
+    df['Subject'] = df['subject'].fillna('')
+    df['Body'] = df['body'].fillna('')
     sequences = (df['Subject'] + " " + df['Body']).tolist()
+    print(f"Classifying {len(sequences)} emails from CSV...")
     predictions = classifier(sequences, candidate_labels=POSSIBLE_LABELS)
     print("Classification complete.")
+    pred_labels = [p['labels'][0] for p in predictions]
+    pred_scores = [p['scores'][0] for p in predictions]
     df['predicted_folder'] = pred_labels
     df['confidence'] = pred_scores
+    # Save results to the database
     database.add_email_records(df)
+    print(f"Saved {len(df)} records from CSV to the database.")
     # Format for display
     df_display = df.copy()
     df_display['confidence'] = [f"{score:.2%}" for score in pred_scores]
+    return df_display
 # --- History & Feedback Functions (Unchanged) ---
 def show_history():
 def save_feedback_and_refresh(record_id, corrected_label):
     if not record_id or not corrected_label:
         return show_history(), "Please enter a Record ID and a correct label first."
     record_id_int = int(record_id)
     database.update_feedback(record_id_int, corrected_label)
     return show_history(), f"Success! Record {record_id_int} updated."
+# --- MODIFIED: Build the Final Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("## 📧 Smart Email Sorter (Zero-Shot Model)")
+    with gr.Tab("Single Prediction"):
+        gr.Markdown("### Classify a Single Email")
+        subject_input = gr.Textbox(label="Subject", lines=1)
+        body_input = gr.Textbox(label="Body", lines=5, placeholder="Paste the email body here...")
+        predict_btn = gr.Button("Classify Email", variant="primary")
+        output_label = gr.Label(label="Result")
+        predict_btn.click(fn=predict_single, inputs=[subject_input, body_input], outputs=output_label, show_progress="full")
+    with gr.Tab("Batch Prediction (CSV)"):
+        gr.Markdown("### Classify a Batch of Emails from a CSV File")
+        csv_input = gr.File(label="Upload a CSV file with 'subject' and 'body' columns")
+        csv_btn = gr.Button("Classify Batch", variant="primary")
+        batch_output = gr.Dataframe(label="Classification Results", wrap=True)
+        csv_btn.click(fn=predict_batch_csv, inputs=csv_input, outputs=batch_output, show_progress="full")
     with gr.Tab("History & Feedback"):
         gr.Markdown("### Prediction History & Feedback")
         history_df = gr.Dataframe(label="All Saved Predictions", wrap=True)
         app.load(fn=show_history, outputs=history_df)
         refresh_history_btn.click(fn=show_history, outputs=history_df)
+        submit_feedback_btn.click(fn=save_feedback_and_refresh, inputs=[record_id_input, feedback_label], outputs=[history_df, feedback_status])