Spaces:

Azidan
/

textSum

Sleeping

App Files Files Community

Azidan commited on Jan 18

Commit

8b8dafc

verified ·

1 Parent(s): b91ee99

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -30

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from PyPDF2 import PdfReader
 # Model setup (CPU-safe)
 # =========================
 MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 summarizer = pipeline(
     "summarization",
@@ -17,8 +16,7 @@ summarizer = pipeline(
 )
 MAX_MODEL_TOKENS = 1024
-CHUNK_SIZE = 900  # safe margin
 # =========================
 # Utilities
@@ -27,21 +25,17 @@ def clean_text(text: str) -> str:
     """Fix quotes, spacing, repetition, and broken punctuation."""
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
     text = re.sub(r"[']{2,}", "'", text)
     text = re.sub(r"\s+", " ", text)
     sentences = re.split(r'(?<=[.!?])\s+', text)
     seen = set()
     result = []
     for s in sentences:
         key = s.strip().lower()
         if key and key not in seen:
             seen.add(key)
             result.append(s.strip())
     return " ".join(result)
@@ -49,12 +43,10 @@ def chunk_text(text: str):
     """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
         chunk_tokens = tokens[i:i + CHUNK_SIZE]
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
     return chunks
@@ -62,10 +54,10 @@ def summarize_long_text(text: str) -> str:
     """Summarize arbitrarily long text safely."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     chunks = chunk_text(text)
     summaries = []
     for chunk in chunks:
         summary = summarizer(
             chunk,
@@ -73,9 +65,8 @@ def summarize_long_text(text: str) -> str:
             min_length=40,
             do_sample=False
         )[0]["summary_text"]
         summaries.append(summary)
     merged = " ".join(summaries)
     return clean_text(merged)
@@ -96,42 +87,93 @@ def read_pdf(file) -> str:
 def process_input(text, file):
     if file is not None:
         text = read_pdf(file)
     return summarize_long_text(text)
 # =========================
 # Gradio UI
 # =========================
-with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Long Text Summarizer (Free-Tier Safe)")
     gr.Markdown(
-        "• Handles **thousands of words**\n"
-        "• Supports **PDF upload**\n"
-        "• Optimized for **CPU / free tier**"
     )
-    text_input = gr.Textbox(
-        lines=15,
-        label="Paste text (optional)"
     )
-    file_input = gr.File(
-        label="Upload PDF (optional)",
-        file_types=[".pdf"]
-    )
     output = gr.Textbox(
         lines=10,
-        label="Summary"
     )
-    summarize_btn = gr.Button("Summarize")
     summarize_btn.click(
         fn=process_input,
         inputs=[text_input, file_input],
         outputs=output
     )
 demo.launch()

 # Model setup (CPU-safe)
 # =========================
 MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 summarizer = pipeline(
     "summarization",
 )
 MAX_MODEL_TOKENS = 1024
+CHUNK_SIZE = 900   # safe margin
 # =========================
 # Utilities
     """Fix quotes, spacing, repetition, and broken punctuation."""
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
     text = re.sub(r"[']{2,}", "'", text)
     text = re.sub(r"\s+", " ", text)
     sentences = re.split(r'(?<=[.!?])\s+', text)
     seen = set()
     result = []
     for s in sentences:
         key = s.strip().lower()
         if key and key not in seen:
             seen.add(key)
             result.append(s.strip())
     return " ".join(result)
     """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
         chunk_tokens = tokens[i:i + CHUNK_SIZE]
         chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
         chunks.append(chunk_text)
     return chunks
     """Summarize arbitrarily long text safely."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     chunks = chunk_text(text)
     summaries = []
     for chunk in chunks:
         summary = summarizer(
             chunk,
             min_length=40,
             do_sample=False
         )[0]["summary_text"]
         summaries.append(summary)
     merged = " ".join(summaries)
     return clean_text(merged)
 def process_input(text, file):
     if file is not None:
         text = read_pdf(file)
     return summarize_long_text(text)
+# =========================
+# Custom theme + CSS
+# =========================
+custom_theme = gr.themes.Default(
+    primary_hue="blue",
+    secondary_hue="gray",
+    neutral_hue="gray",
+    font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'sans-serif'],
+).set(
+    body_background_fill="#ffffff",
+    body_background_fill_dark="#ffffff",
+    block_background_fill="#ffffff",
+    block_background_fill_dark="#ffffff",
+    button_primary_background_fill="#2563eb",          # nice blue
+    button_primary_background_fill_hover="#1d4ed8",    # darker on hover
+    button_primary_text_color="#ffffff",
+    button_primary_border_color="#2563eb",
+)
+custom_css = """
+.gradio-container {
+    background-color: #ffffff !important;
+}
+header, .gr-top, .gr-header {
+    background-color: #f8f9fa !important;  /* light gray navbar-like bar */
+    border-bottom: 1px solid #e5e7eb !important;
+    padding: 12px 24px !important;
+}
+.gr-button-primary {
+    border-radius: 8px !important;
+    font-weight: 600 !important;
+}
+h1 {
+    margin: 0 !important;
+    color: #1f2937 !important;
+}
+"""
 # =========================
 # Gradio UI
 # =========================
+with gr.Blocks(theme=custom_theme, css=custom_css) as demo:
     gr.Markdown(
+        "# 📄 Long Text Summarizer (Free-Tier Safe)",
+        elem_classes=["pb-2"]
     )
+    gr.Markdown(
+        "• Handles **thousands of words**  \n"
+        "• Supports **PDF upload**  \n"
+        "• Optimized for **CPU / free tier**",
+        elem_classes=["text-gray-600", "text-sm", "mb-6"]
     )
+    with gr.Row():
+        with gr.Column(scale=5):
+            text_input = gr.Textbox(
+                lines=15,
+                label="Paste text (optional)",
+                placeholder="Paste your long article / text here...",
+            )
+        with gr.Column(scale=1, min_width=240):
+            file_input = gr.File(
+                label="Upload PDF (optional)",
+                file_types=[".pdf"],
+            )
+    summarize_btn = gr.Button("Summarize", variant="primary", scale=0)
     output = gr.Textbox(
         lines=10,
+        label="Summary",
+        placeholder="Summary will appear here...",
     )
     summarize_btn.click(
         fn=process_input,
         inputs=[text_input, file_input],
         outputs=output
     )
+# Change share=True → debug=True during development
 demo.launch()
+# demo.launch(server_name="0.0.0.0")   # ← use this on HF Spaces if needed