| import gradio as gr |
| import re |
| from transformers import pipeline, AutoTokenizer |
| from PyPDF2 import PdfReader |
|
|
| |
| |
| |
| MODEL_NAME = "sshleifer/distilbart-cnn-12-6" |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| summarizer = pipeline( |
| "summarization", |
| model=MODEL_NAME, |
| tokenizer=tokenizer, |
| device=-1 |
| ) |
|
|
| CHUNK_SIZE = 900 |
|
|
| |
| |
| |
| def clean_text(text: str) -> str: |
| """Fix quotes, spacing, repetition, and broken punctuation.""" |
| text = text.replace("β", "'").replace("β", "'") |
| text = text.replace("β", '"').replace("β", '"') |
| text = re.sub(r"[.]{2,}", ".", text) |
| text = re.sub(r"[']{2,}", "'", text) |
| text = re.sub(r"\s+", " ", text) |
| sentences = re.split(r'(?<=[.!?])\s+', text) |
| seen = set() |
| result = [] |
| for s in sentences: |
| key = s.strip().lower() |
| if key and key not in seen: |
| seen.add(key) |
| result.append(s.strip()) |
| return " ".join(result) |
|
|
|
|
| def chunk_text(text: str): |
| """Token-aware chunking to avoid model overflow.""" |
| tokens = tokenizer.encode(text, add_special_tokens=False) |
| chunks = [] |
| for i in range(0, len(tokens), CHUNK_SIZE): |
| chunk_tokens = tokens[i:i + CHUNK_SIZE] |
| chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True) |
| chunks.append(chunk_text) |
| return chunks |
|
|
|
|
| def summarize_long_text(text: str) -> str: |
| """Summarize arbitrarily long text safely.""" |
| if not text or len(text.strip()) == 0: |
| return "No text provided." |
| |
| chunks = chunk_text(text) |
| summaries = [] |
| |
| for chunk in chunks: |
| summary = summarizer( |
| chunk, |
| max_length=150, |
| min_length=40, |
| do_sample=False |
| )[0]["summary_text"] |
| summaries.append(summary) |
| |
| merged = " ".join(summaries) |
| cleaned_summary = clean_text(merged) |
| |
| |
| study_advice = """ |
| |
| --- |
| |
| ### π How to Study This Summary Effectively |
| |
| Here are some proven techniques to help you learn and remember the material better: |
| |
| - **Active Recall** β Cover the summary (or close your eyes) and try to explain each main point in your own words. This is one of the most powerful ways to strengthen memory. |
| - **Spaced Repetition** β Review this summary today, again in 2β3 days, then in one week. Use free apps like Anki or Quizlet to turn key points into flashcards. |
| - **Feynman Technique** β Pretend you're teaching this topic to a friend (or a 12-year-old). Explaining it simply reveals what you truly understand. |
| - **Self-Testing** β Create 3β5 questions from the summary (e.g. βWhat isβ¦?β, βWhy doesβ¦?β, βGive an example ofβ¦β). Answer them without looking. |
| - **Make Connections** β Draw a quick mind map or diagram linking the main ideas together. This helps see the big picture. |
| - **Apply It** β If possible, solve related problems, write a short paragraph, or discuss the topic with someone. |
| |
| Re-reading alone is weak β **active engagement** is what makes information stick! |
| |
| Good luck with your studies! π |
| """ |
| |
| return cleaned_summary + study_advice |
|
|
|
|
| def read_pdf(file) -> str: |
| """Safely extract text from PDF.""" |
| try: |
| reader = PdfReader(file) |
| pages = [page.extract_text() or "" for page in reader.pages] |
| return " ".join(pages) |
| except Exception as e: |
| return f"PDF read error: {e}" |
|
|
|
|
| |
| |
| |
| def process_input(text, file): |
| if file is not None: |
| text = read_pdf(file) |
| return summarize_long_text(text) |
|
|
|
|
| |
| |
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# π Long Text Summarizer (Free-Tier Safe)") |
| gr.Markdown( |
| "β’ Handles **thousands of words**\n" |
| "β’ Supports **PDF upload**\n" |
| "β’ Optimized for **CPU / free tier**\n" |
| "β’ Includes **study tips** to help you learn better" |
| ) |
| |
| text_input = gr.Textbox( |
| lines=15, |
| label="Paste text (optional)", |
| placeholder="Paste your lecture notes, article, or book chapter here..." |
| ) |
| |
| file_input = gr.File( |
| label="Upload PDF (optional)", |
| file_types=[".pdf"] |
| ) |
| |
| output = gr.Textbox( |
| lines=14, |
| label="Summary + Study Advice", |
| placeholder="Your summary and learning tips will appear here..." |
| ) |
| |
| summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary") |
| |
| summarize_btn.click( |
| fn=process_input, |
| inputs=[text_input, file_input], |
| outputs=output |
| ) |
|
|
| demo.launch() |