import pandas as pd import torch from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification import joblib import gradio as gr # ----------------------------- # Load trained transformer # ----------------------------- model_path = "./models/transformer" model = DistilBertForSequenceClassification.from_pretrained(model_path) tokenizer = DistilBertTokenizerFast.from_pretrained(model_path) le = joblib.load(f"{model_path}/le.pkl") model.eval() # ----------------------------- # Prediction function for CSV # ----------------------------- def predict_csv(file): df = pd.read_csv(file.name) if 'subject' not in df.columns or 'body' not in df.columns: return "CSV must have 'subject' and 'body' columns." texts = df['subject'] + " " + df['body'] predictions = [] for text in texts: inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) pred_id = torch.argmax(outputs.logits, dim=1).item() pred_label = le.inverse_transform([pred_id])[0] predictions.append(pred_label) df['predicted_folder'] = predictions return df # Gradio will display as a table # ----------------------------- # Gradio interface # ----------------------------- iface = gr.Interface( fn=predict_csv, inputs=gr.File(label="Upload CSV"), outputs=gr.Dataframe(label="Predicted Folders"), title="Smart Email Sorter (Transformer) - CSV Upload", description="Upload a CSV with 'subject' and 'body' columns to predict email folders." ) # ----------------------------- # Launch # ----------------------------- iface.launch(share=True)