Spaces:

IotaCluster
/

Summarizer

Paused

App Files Files Community

IotaCluster commited on Jul 8, 2025

Commit

424339c

verified ·

1 Parent(s): 4b4fd48

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -37

app.py CHANGED Viewed

@@ -1,55 +1,44 @@
 import re
 import gradio as gr
-from transformers import T5Tokenizer, T5ForConditionalGeneration
-# Load the model and tokenizer
-model_name = "t5-small"
-tokenizer = T5Tokenizer.from_pretrained(model_name)
-model = T5ForConditionalGeneration.from_pretrained(model_name)
-# Function to remove confidentiality notice
 def remove_confidentiality(text: str) -> str:
-    # Pattern matches the confidentiality notice starting with 'CONFIDENTIALITY NOTICE:'
     pattern = r"\*\*CONFIDENTIALITY NOTICE:.*"
-    # Split text at the notice and keep only the part before it
-    cleaned = re.split(pattern, text, flags=re.DOTALL)[0]
-    return cleaned.strip()
-# Define the summarization function
 def summarize_text(text):
-    # Clean the text by removing the confidentiality notice if present
-    cleaned_text = remove_confidentiality(text)
-    # Prepare input for summarization
-    input_text = "summarize: " + cleaned_text.strip()
-    input_ids = tokenizer.encode(
-        input_text,
-        return_tensors="pt",
-        max_length=500,
-        truncation=True
-    )
-    summary_ids = model.generate(
-        input_ids,
-        max_length=900,
-        min_length=800,
-        length_penalty=2.0,
-        num_beams=2,
         early_stopping=True
     )
-    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-    return summary
-# Gradio interface
 iface = gr.Interface(
     fn=summarize_text,
     inputs=gr.Textbox(lines=15, placeholder="Paste your text here..."),
     outputs=gr.Textbox(label="Summary"),
-    title="T5 Text Summarizer",
-    description="Enter any long English text to get a summarized version using the T5 model."
 )
-# Launch
-def main():
-    iface.launch()
 if __name__ == "__main__":
-    main()

 import re
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Choose the distilled BART checkpoint
+model_name = "sshleifer/distilbart-cnn-12-6"
+tokenizer  = AutoTokenizer.from_pretrained(model_name)
+model      = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+# Pre‑build the HF summarization pipeline (faster in repeated calls)
+summarizer = pipeline(
+    "summarization",
+    model=model,
+    tokenizer=tokenizer,
+    device=0  # set to -1 for CPU, or the GPU ID if available
+)
 def remove_confidentiality(text: str) -> str:
     pattern = r"\*\*CONFIDENTIALITY NOTICE:.*"
+    return re.split(pattern, text, flags=re.DOTALL)[0].strip()
 def summarize_text(text):
+    cleaned = remove_confidentiality(text)
+    # pipeline will chunk long inputs automatically if you pass `max_length` and `min_length`
+    summary_list = summarizer(
+        cleaned,
+        max_length=200,   # shorter target length for punchier summaries
+        min_length=50,
+        length_penalty=1.5,
+        num_beams=3,
         early_stopping=True
     )
+    return summary_list[0]["summary_text"]
 iface = gr.Interface(
     fn=summarize_text,
     inputs=gr.Textbox(lines=15, placeholder="Paste your text here..."),
     outputs=gr.Textbox(label="Summary"),
+    title="Fast & Accurate Summarizer",
+    description="Using the distilled BART model for quicker, high-quality summaries."
 )
 if __name__ == "__main__":
+    iface.launch()