Spaces:

Suryacoder
/

Smart-Email-Sorter

Sleeping

Smart-Email-Sorter / backend /gradio_app.py

Surya8663

Final version, database correctly ignored

4ded330 4 months ago

1.73 kB

	import pandas as pd
	import torch
	from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
	import joblib
	import gradio as gr

	# -----------------------------
	# Load trained transformer
	# -----------------------------
	model_path = "./models/transformer"
	model = DistilBertForSequenceClassification.from_pretrained(model_path)
	tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
	le = joblib.load(f"{model_path}/le.pkl")
	model.eval()

	# -----------------------------
	# Prediction function for CSV
	# -----------------------------
	def predict_csv(file):
	df = pd.read_csv(file.name)
	if 'subject' not in df.columns or 'body' not in df.columns:
	return "CSV must have 'subject' and 'body' columns."

	texts = df['subject'] + " " + df['body']
	predictions = []

	for text in texts:
	inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	pred_id = torch.argmax(outputs.logits, dim=1).item()
	pred_label = le.inverse_transform([pred_id])[0]
	predictions.append(pred_label)

	df['predicted_folder'] = predictions
	return df # Gradio will display as a table

	# -----------------------------
	# Gradio interface
	# -----------------------------
	iface = gr.Interface(
	fn=predict_csv,
	inputs=gr.File(label="Upload CSV"),
	outputs=gr.Dataframe(label="Predicted Folders"),
	title="Smart Email Sorter (Transformer) - CSV Upload",
	description="Upload a CSV with 'subject' and 'body' columns to predict email folders."
	)

	# -----------------------------
	# Launch
	# -----------------------------
	iface.launch(share=True)