Smart-Email-Sorter / backend /gradio_app.py
Surya8663
Final version, database correctly ignored
4ded330
import pandas as pd
import torch
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import joblib
import gradio as gr
# -----------------------------
# Load trained transformer
# -----------------------------
model_path = "./models/transformer"
model = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
le = joblib.load(f"{model_path}/le.pkl")
model.eval()
# -----------------------------
# Prediction function for CSV
# -----------------------------
def predict_csv(file):
df = pd.read_csv(file.name)
if 'subject' not in df.columns or 'body' not in df.columns:
return "CSV must have 'subject' and 'body' columns."
texts = df['subject'] + " " + df['body']
predictions = []
for text in texts:
inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
pred_id = torch.argmax(outputs.logits, dim=1).item()
pred_label = le.inverse_transform([pred_id])[0]
predictions.append(pred_label)
df['predicted_folder'] = predictions
return df # Gradio will display as a table
# -----------------------------
# Gradio interface
# -----------------------------
iface = gr.Interface(
fn=predict_csv,
inputs=gr.File(label="Upload CSV"),
outputs=gr.Dataframe(label="Predicted Folders"),
title="Smart Email Sorter (Transformer) - CSV Upload",
description="Upload a CSV with 'subject' and 'body' columns to predict email folders."
)
# -----------------------------
# Launch
# -----------------------------
iface.launch(share=True)