Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import torch | |
| from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification | |
| import joblib | |
| import gradio as gr | |
| # ----------------------------- | |
| # Load trained transformer | |
| # ----------------------------- | |
| model_path = "./models/transformer" | |
| model = DistilBertForSequenceClassification.from_pretrained(model_path) | |
| tokenizer = DistilBertTokenizerFast.from_pretrained(model_path) | |
| le = joblib.load(f"{model_path}/le.pkl") | |
| model.eval() | |
| # ----------------------------- | |
| # Prediction function for CSV | |
| # ----------------------------- | |
| def predict_csv(file): | |
| df = pd.read_csv(file.name) | |
| if 'subject' not in df.columns or 'body' not in df.columns: | |
| return "CSV must have 'subject' and 'body' columns." | |
| texts = df['subject'] + " " + df['body'] | |
| predictions = [] | |
| for text in texts: | |
| inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| pred_id = torch.argmax(outputs.logits, dim=1).item() | |
| pred_label = le.inverse_transform([pred_id])[0] | |
| predictions.append(pred_label) | |
| df['predicted_folder'] = predictions | |
| return df # Gradio will display as a table | |
| # ----------------------------- | |
| # Gradio interface | |
| # ----------------------------- | |
| iface = gr.Interface( | |
| fn=predict_csv, | |
| inputs=gr.File(label="Upload CSV"), | |
| outputs=gr.Dataframe(label="Predicted Folders"), | |
| title="Smart Email Sorter (Transformer) - CSV Upload", | |
| description="Upload a CSV with 'subject' and 'body' columns to predict email folders." | |
| ) | |
| # ----------------------------- | |
| # Launch | |
| # ----------------------------- | |
| iface.launch(share=True) | |