import pandas as pd
import torch
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import joblib
import gradio as gr

# -----------------------------
# Load trained transformer
# -----------------------------
model_path = "./models/transformer"
model = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
le = joblib.load(f"{model_path}/le.pkl")
model.eval()

# -----------------------------
# Prediction function for CSV
# -----------------------------
def predict_csv(file):
    df = pd.read_csv(file.name)
    if 'subject' not in df.columns or 'body' not in df.columns:
        return "CSV must have 'subject' and 'body' columns."
    
    texts = df['subject'] + " " + df['body']
    predictions = []
    
    for text in texts:
        inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)
            pred_id = torch.argmax(outputs.logits, dim=1).item()
            pred_label = le.inverse_transform([pred_id])[0]
        predictions.append(pred_label)
    
    df['predicted_folder'] = predictions
    return df  # Gradio will display as a table

# -----------------------------
# Gradio interface
# -----------------------------
iface = gr.Interface(
    fn=predict_csv,
    inputs=gr.File(label="Upload CSV"),
    outputs=gr.Dataframe(label="Predicted Folders"),
    title="Smart Email Sorter (Transformer) - CSV Upload",
    description="Upload a CSV with 'subject' and 'body' columns to predict email folders."
)

# -----------------------------
# Launch
# -----------------------------
iface.launch(share=True)