import gradio as gr from optimum.onnxruntime import ORTModelForSequenceClassification from transformers import pipeline, AutoTokenizer # Load tokenizer tokenizer = AutoTokenizer.from_pretrained("iam-tsr/finetuned-distilbert-employ-feedback") # Load Quantized ONNX model onnx_filename = "model_qint8.onnx" model = ORTModelForSequenceClassification.from_pretrained("iam-tsr/finetuned-distilbert-employ-feedback", file_name=onnx_filename) def pred(model, tokenizer, text): pipe = pipeline(task="text-classification", model=model, tokenizer=tokenizer, device="cpu") return pipe(text)[0]['label'] demo = gr.Interface( fn=lambda text: pred(model, tokenizer, text), inputs=["text"], outputs=["text"], api_name="predict" ) demo.launch(share=True)