textSum / app.py
Azidan's picture
Update app.py
2f00a52 verified
raw
history blame
4.82 kB
import gradio as gr
import re
from transformers import pipeline, AutoTokenizer
from PyPDF2 import PdfReader
# =========================
# Model setup (CPU-safe)
# =========================
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
summarizer = pipeline(
"summarization",
model=MODEL_NAME,
tokenizer=tokenizer,
device=-1 # CPU only
)
CHUNK_SIZE = 900 # safe margin
# =========================
# Utilities
# =========================
def clean_text(text: str) -> str:
"""Fix quotes, spacing, repetition, and broken punctuation."""
text = text.replace("β€˜", "'").replace("’", "'")
text = text.replace("β€œ", '"').replace("”", '"')
text = re.sub(r"[.]{2,}", ".", text)
text = re.sub(r"[']{2,}", "'", text)
text = re.sub(r"\s+", " ", text)
sentences = re.split(r'(?<=[.!?])\s+', text)
seen = set()
result = []
for s in sentences:
key = s.strip().lower()
if key and key not in seen:
seen.add(key)
result.append(s.strip())
return " ".join(result)
def chunk_text(text: str):
"""Token-aware chunking to avoid model overflow."""
tokens = tokenizer.encode(text, add_special_tokens=False)
chunks = []
for i in range(0, len(tokens), CHUNK_SIZE):
chunk_tokens = tokens[i:i + CHUNK_SIZE]
chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
chunks.append(chunk_text)
return chunks
def summarize_long_text(text: str) -> str:
"""Summarize arbitrarily long text safely."""
if not text or len(text.strip()) == 0:
return "No text provided."
chunks = chunk_text(text)
summaries = []
for chunk in chunks:
summary = summarizer(
chunk,
max_length=150,
min_length=40,
do_sample=False
)[0]["summary_text"]
summaries.append(summary)
merged = " ".join(summaries)
cleaned_summary = clean_text(merged)
# Add study advice section
study_advice = """
---
### πŸ“š How to Study This Summary Effectively
Here are some proven techniques to help you learn and remember the material better:
- **Active Recall** β€” Cover the summary (or close your eyes) and try to explain each main point in your own words. This is one of the most powerful ways to strengthen memory.
- **Spaced Repetition** β€” Review this summary today, again in 2–3 days, then in one week. Use free apps like Anki or Quizlet to turn key points into flashcards.
- **Feynman Technique** β€” Pretend you're teaching this topic to a friend (or a 12-year-old). Explaining it simply reveals what you truly understand.
- **Self-Testing** β€” Create 3–5 questions from the summary (e.g. β€œWhat is…?”, β€œWhy does…?”, β€œGive an example of…”). Answer them without looking.
- **Make Connections** β€” Draw a quick mind map or diagram linking the main ideas together. This helps see the big picture.
- **Apply It** β€” If possible, solve related problems, write a short paragraph, or discuss the topic with someone.
Re-reading alone is weak β€” **active engagement** is what makes information stick!
Good luck with your studies! πŸš€
"""
return cleaned_summary + study_advice
def read_pdf(file) -> str:
"""Safely extract text from PDF."""
try:
reader = PdfReader(file)
pages = [page.extract_text() or "" for page in reader.pages]
return " ".join(pages)
except Exception as e:
return f"PDF read error: {e}"
# =========================
# Main handler
# =========================
def process_input(text, file):
if file is not None:
text = read_pdf(file)
return summarize_long_text(text)
# =========================
# Gradio UI
# =========================
with gr.Blocks() as demo:
gr.Markdown("# πŸ“„ Long Text Summarizer (Free-Tier Safe)")
gr.Markdown(
"β€’ Handles **thousands of words**\n"
"β€’ Supports **PDF upload**\n"
"β€’ Optimized for **CPU / free tier**\n"
"β€’ Includes **study tips** to help you learn better"
)
text_input = gr.Textbox(
lines=15,
label="Paste text (optional)",
placeholder="Paste your lecture notes, article, or book chapter here..."
)
file_input = gr.File(
label="Upload PDF (optional)",
file_types=[".pdf"]
)
output = gr.Textbox(
lines=14,
label="Summary + Study Advice",
placeholder="Your summary and learning tips will appear here..."
)
summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
summarize_btn.click(
fn=process_input,
inputs=[text_input, file_input],
outputs=output
)
demo.launch()