import gradio as gr from optimum.onnxruntime import ORTModelForSeq2SeqLM from transformers import AutoTokenizer, pipeline # Load ONNX model def create_fast_summarizer(): model = ORTModelForSeq2SeqLM.from_pretrained( "onnx-community/bart-large-cnn-ONNX", encoder_file_name="encoder_model_q4.onnx", decoder_file_name="decoder_model_q4.onnx", provider="CPUExecutionProvider", use_io_binding=True ) tokenizer = AutoTokenizer.from_pretrained( "onnx-community/bart-large-cnn-ONNX", use_fast=True ) return pipeline( "summarization", model=model, tokenizer=tokenizer, device=-1 ) summarizer = create_fast_summarizer() # Summarize function with prompt +tuned params def summarize_text(text): prompt = "Summarize the key events, including casualties and political context:\n" + text result = summarizer( prompt, max_length=160, min_length=55, do_sample=False, num_beams=6, length_penalty=1.5, no_repeat_ngram_size=3, # Prevent repetition clean_up_tokenization_spaces=True, early_stopping=True ) return result[0]['summary_text'] # Build Gradio interface app = gr.Interface( fn=summarize_text, inputs=gr.Textbox(lines=15, placeholder="Paste your text here..."), outputs="text", title="ONNX Summarizer 🚀", description="Paste any news or article text and get a concise, context-rich summary." ) app.launch(mcp_server=True,share=True)