Spaces:

IotaCluster
/

Summarizer

Sleeping

App Files Files Community

IotaCluster commited on Jul 7, 2025

Commit

649792d

verified ·

1 Parent(s): feedcad

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -70

app.py CHANGED Viewed

@@ -1,71 +1,25 @@
-import re
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Use a stronger instruction-tuned model
-# Options: "google/flan-t5-base", "google/flan-t5-large" (ensure your Space has enough RAM/GPU)
-MODEL_NAME = "google/flan-t5-large"
-def load_cleaner():
-    """
-    Lazily load the tokenizer and model to avoid startup caching issues.
-    """
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-    return tokenizer, model
-# Initialize as None; will be loaded on first request
-tokenizer, model = None, None
-def clean_email(raw_text: str) -> str:
-    global tokenizer, model
-    # Lazy load
-    if tokenizer is None or model is None:
-        tokenizer, model = load_cleaner()
-    # Remove confidentiality notice
-    raw_text = re.sub(
-        r"\*\*CONFIDENTIALITY NOTICE:[\s\S]*$", "", raw_text,
-        flags=re.IGNORECASE
-    )
-    # Build prompt
-    prompt = (
-        "Please rewrite this email so that all signatures, forwarded headers, "
-        "image placeholders, social‑media links, and confidentiality footers are removed. "
-        "Preserve only the core message:\n\n" + raw_text
-    )
-    # Tokenize input (up to 1024 tokens)
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
-    # Generate cleaned output (minimum and maximum 1024 tokens)
-    outputs = model.generate(
-        **inputs,
-        max_length=1024,
-        min_length=1024,
-        num_beams=5,
-        early_stopping=True
-    )
-    # Decode
-    cleaned = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return cleaned
-# Build Gradio interface
-def main():
-    with gr.Blocks() as demo:
-        gr.Markdown(
-            "# Email Cleaner"
-            "\nPaste your raw email below and click **Clean**—signatures, headers, links, and footers will be stripped out."
-        )
-        with gr.Row():
-            inp = gr.Textbox(lines=15, placeholder="Paste raw email here...", label="Raw Email")
-            out = gr.Textbox(lines=40, label="Cleaned Email (1024 tokens minimum)")
-        btn = gr.Button("Clean")
-        btn.click(fn=clean_email, inputs=inp, outputs=out)
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)
-if __name__ == "__main__":
-    main()

 import gradio as gr
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+# Load the model and tokenizer
+model_name = "t5-small"
+tokenizer = T5Tokenizer.from_pretrained(model_name)
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+# Define the summarization function
+def summarize_text(text):
+    input_text = "summarize: " + text.strip()
+    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=500, truncation=True)
+    summary_ids = model.generate(input_ids, max_length=140, min_length=40, length_penalty=2.0, num_beams=2, early_stopping=True)
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return summary
+# Gradio interface
+iface = gr.Interface(fn=summarize_text,
+                     inputs=gr.Textbox(lines=15, placeholder="Paste your text here..."),
+                     outputs=gr.Textbox(label="Summary"),
+                     title="T5 Text Summarizer",
+                     description="Enter any long English text to get a summarized version using the T5 model.")
+# Launch
+iface.launch()