Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| # Choose the distilled BART checkpoint | |
| model_name = "sshleifer/distilbart-cnn-12-6" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # Pre‑build the HF summarization pipeline (faster in repeated calls) | |
| summarizer = pipeline( | |
| "summarization", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=-1, # set to -1 for CPU, or the GPU ID if available | |
| truncation = True | |
| ) | |
| def remove_confidentiality(text: str) -> str: | |
| pattern = r"\*\*CONFIDENTIALITY NOTICE:.*" | |
| return re.split(pattern, text, flags=re.DOTALL)[0].strip() | |
| def summarize_text(text): | |
| cleaned = remove_confidentiality(text) | |
| # pipeline will chunk long inputs automatically if you pass `max_length` and `min_length` | |
| summary_list = summarizer( | |
| cleaned, | |
| max_length=250, # shorter target length for punchier summaries | |
| min_length=50, | |
| length_penalty=1.0, | |
| num_beams=3, | |
| temperature=0.7, | |
| early_stopping=True | |
| ) | |
| return summary_list[0]["summary_text"] | |
| iface = gr.Interface( | |
| fn=summarize_text, | |
| inputs=gr.Textbox(lines=15, placeholder="Paste your text here..."), | |
| outputs=gr.Textbox(label="Summary"), | |
| title="Fast & Accurate Summarizer", | |
| description="Using the distilled BART model for quicker, high-quality summaries." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |