Spaces:

Azidan
/

text-sum

Sleeping

App Files Files Community

Azidan commited on Jan 12

Commit

542de0c

verified ·

1 Parent(s): ae8ae80

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import gradio as gr
+from transformers import pipeline
+import torch
+import pdfplumber
+from docx import Document
+import io
+# Load the summarization model once
+device = 0 if torch.cuda.is_available() else -1
+print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
+summarizer = pipeline(
+    "summarization",
+    model="sshleifer/distilbart-cnn-12-6",  # Fast & good for CPU; change to "facebook/bart-large-cnn" if you get GPU
+    device=device
+)
+def extract_text(file):
+    if file is None:
+        return ""
+    filename = file.name.lower()
+    content = file.read()
+    try:
+        if filename.endswith('.pdf'):
+            with pdfplumber.open(io.BytesIO(content)) as pdf:
+                return "\n".join(page.extract_text() or "" for page in pdf.pages)
+        elif filename.endswith('.docx'):
+            doc = Document(io.BytesIO(content))
+            return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
+        elif filename.endswith('.txt'):
+            return content.decode('utf-8', errors='replace')
+        else:
+            return "Unsupported file. Please use .pdf, .docx, or .txt"
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+def summarize(input_text, file, detail_level):
+    if file is not None:
+        text = extract_text(file)
+    else:
+        text = input_text.strip()
+    if not text:
+        return "Please paste text or upload a valid lecture file."
+    words = len(text.split())
+    if words < 100:
+        return text  # Too short → return as-is
+    # Convert slider (0.15 to 0.60) to target length ratio
+    target_ratio = detail_level
+    target_length = int(words * target_ratio)
+    max_l = max(500, min(1400, target_length + 250))
+    min_l = max(300, int(target_length * 0.65))
+    try:
+        result = summarizer(
+            text,
+            max_length=max_l,
+            min_length=min_l,
+            length_penalty=1.8,
+            num_beams=4,
+            early_stopping=True,
+            do_sample=False,
+            truncation=True
+        )
+        return result[0]['summary_text']
+    except Exception as e:
+        return f"Error during summarization: {str(e)}\n(Try shorter text or lower detail level)"
+# Create Gradio interface
+interface = gr.Interface(
+    fn=summarize,
+    inputs=[
+        gr.Textbox(
+            lines=12,
+            placeholder="Paste your lecture text here (or use the upload below)...",
+            label="Lecture Text (Paste)"
+        ),
+        gr.File(
+            file_types=[".pdf", ".docx", ".txt"],
+            label="Upload Lecture File"
+        ),
+        gr.Slider(
+            minimum=0.15,
+            maximum=0.60,
+            value=0.32,
+            step=0.01,
+            label="Detail Level (higher = longer, more detailed summary)"
+        )
+    ],
+    outputs=gr.Textbox(label="Generated Summary"),
+    title="Lecture Summarizer",
+    description="Upload a lecture file (PDF/DOCX/TXT) or paste text. Adjust the slider for shorter or more detailed summaries.",
+    theme="soft",
+    allow_flagging="never",
+    examples=[
+        ["Sample lecture: Photosynthesis is the process by which plants convert light energy into chemical energy...", None, 0.40],
+        [None, "example_lecture.pdf", 0.30]  # You can upload a sample later if wanted
+    ]
+)
+interface.launch()