Spaces:

Azidan
/

textSum

Running

App Files Files Community

Azidan commited on 9 days ago

Commit

8c3fb35

verified ·

1 Parent(s): d8547ca

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -114

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import gradio as gr
 import re
 from transformers import pipeline, AutoTokenizer
 from PyPDF2 import PdfReader
-from collections import Counter
-import string
 # =========================
 # Model setup (CPU-safe)
@@ -17,57 +15,14 @@ summarizer = pipeline(
     device=-1  # CPU only
 )
-CHUNK_SIZE = 900  # safe margin
-# =========================
-# Subject-specific tip triggers (expandable)
-# =========================
-SUBJECT_TIPS = {
-    "math": [
-        "Practice similar problems step-by-step — repetition builds fluency.",
-        "Focus on understanding formulas and when to apply them.",
-        "Work backwards from answers to see common mistake patterns."
-    ],
-    "physics": [
-        "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
-        "Practice unit conversions and dimensional analysis first.",
-        "Solve numerical examples to connect theory to real numbers."
-    ],
-    "chemistry": [
-        "Draw reaction mechanisms and label reactants/products.",
-        "Make flashcards for periodic trends, solubility rules, or functional groups.",
-        "Balance equations repeatedly until it's automatic."
-    ],
-    "biology": [
-        "Draw and label diagrams (cells, cycles, anatomy) from memory.",
-        "Use mnemonics for processes (e.g., Krebs cycle steps).",
-        "Compare/contrast similar concepts (mitosis vs meiosis)."
-    ],
-    "history": [
-        "Create a timeline or flowchart of events and causes/effects.",
-        "Make cause-effect chains and link them to bigger themes.",
-        "Quiz yourself on dates, people, and turning points."
-    ],
-    "literature": [
-        "Identify themes, symbols, and character development — write short explanations.",
-        "Compare this text to others you've read.",
-        "Practice essay-style answers: thesis + evidence + analysis."
-    ],
-}
-# Add aliases safely AFTER the dictionary is fully defined
-SUBJECT_TIPS["equation"] = SUBJECT_TIPS["math"]
-SUBJECT_TIPS["formula"] = SUBJECT_TIPS["math"]
-# You can easily add more: SUBJECT_TIPS["calculus"] = SUBJECT_TIPS["math"]
-# SUBJECT_TIPS["algebra"] = SUBJECT_TIPS["math"] etc.
-GENERAL_TIPS = [
-    "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
-    "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
-    "Use **Feynman Technique**: Explain it simply as if teaching a younger student.",
-    "Create 3–5 self-test questions from the summary and answer without looking.",
-    "Draw a quick mind map connecting the main ideas."
-]
 # =========================
 # Utilities
@@ -89,7 +44,6 @@ def clean_text(text: str) -> str:
             result.append(s.strip())
     return " ".join(result)
 def chunk_text(text: str):
     """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
@@ -100,59 +54,34 @@ def chunk_text(text: str):
         chunks.append(chunk_text)
     return chunks
-def get_simple_keywords(summary: str, top_n=15):
-    """Very basic keyword extraction: most frequent meaningful words."""
-    text = summary.lower()
-    text = text.translate(str.maketrans("", "", string.punctuation))
-    words = text.split()
-    stop_words = {
-        "the", "a", "an", "and", "or", "but", "is", "are", "was", "were",
-        "this", "that", "these", "those", "in", "on", "at", "to", "of",
-        "for", "with", "by", "from", "as", "it", "its", "be", "have", "has"
-    }
-    filtered = [w for w in words if w not in stop_words and len(w) > 2]
-    counter = Counter(filtered)
-    return [word for word, _ in counter.most_common(top_n)]
-def generate_dynamic_advice(summary: str):
-    keywords = get_simple_keywords(summary)
-    detected_tips = []
-    seen_categories = set()
-    for word in keywords:
-        for category, tips in SUBJECT_TIPS.items():
-            if category in word and category not in seen_categories:
-                detected_tips.extend(tips[:2])  # max 2 tips per matched category
-                seen_categories.add(category)
-    # Always include some general advice
-    selected_general = GENERAL_TIPS[:4]
-    all_tips = detected_tips + selected_general
-    if not all_tips:
-        all_tips = GENERAL_TIPS[:4]
-    advice_md = "\n\n---\n\n### 📚 Personalized Study Tips (based on content)\n\n"
-    for tip in all_tips:
-        advice_md += f"- {tip}\n"
-    advice_md += "\n**Pro tip**: Try rewriting the main ideas in your own words after 24 hours — it really helps long-term retention!\n"
     return advice_md
 def summarize_long_text(text: str) -> str:
-    """Summarize arbitrarily long text safely + add study advice."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     chunks = chunk_text(text)
     summaries = []
     for chunk in chunks:
         summary = summarizer(
             chunk,
@@ -161,15 +90,14 @@ def summarize_long_text(text: str) -> str:
             do_sample=False
         )[0]["summary_text"]
         summaries.append(summary)
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
-    # Generate dynamic study advice
-    dynamic_advice = generate_dynamic_advice(cleaned_summary)
-    return cleaned_summary + dynamic_advice
 def read_pdf(file) -> str:
     """Safely extract text from PDF."""
@@ -180,7 +108,6 @@ def read_pdf(file) -> str:
     except Exception as e:
         return f"PDF read error: {e}"
 # =========================
 # Main handler
 # =========================
@@ -189,38 +116,37 @@ def process_input(text, file):
         text = read_pdf(file)
     return summarize_long_text(text)
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Long Text Summarizer + Study Assistant")
     gr.Markdown(
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
-        "• Includes **general + dynamic study tips** based on content keywords"
     )
     text_input = gr.Textbox(
         lines=15,
         label="Paste text (optional)",
         placeholder="Paste lecture notes, textbook chapter, article..."
     )
     file_input = gr.File(
         label="Upload PDF (optional)",
         file_types=[".pdf"]
     )
     output = gr.Textbox(
         lines=16,
-        label="Summary + Personalized Study Advice",
-        placeholder="Summary appears first, followed by tailored learning tips..."
     )
-    summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
     summarize_btn.click(
         fn=process_input,
         inputs=[text_input, file_input],

 import re
 from transformers import pipeline, AutoTokenizer
 from PyPDF2 import PdfReader
 # =========================
 # Model setup (CPU-safe)
     device=-1  # CPU only
 )
+# New AI advice generator - lightweight text2text model
+advice_generator = pipeline(
+    "text2text-generation",
+    model="google/flan-t5-small",
+    device=-1  # CPU only
+)
+CHUNK_SIZE = 900  # safe margin for summarizer
 # =========================
 # Utilities
             result.append(s.strip())
     return " ".join(result)
 def chunk_text(text: str):
     """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
         chunks.append(chunk_text)
     return chunks
+def generate_ai_advice(summary: str) -> str:
+    """Use AI to generate personalized study advice based on the summary."""
+    # Truncate summary if too long for the small model
+    truncated_summary = summary[:800]  # Safe limit for flan-t5-small
+    prompt = (
+        f"Based on this summary: {truncated_summary}\n"
+        "Generate 5 concise study tips for a student to enhance learning and retention."
+    )
+    generated = advice_generator(prompt, max_length=200, num_return_sequences=1)[0]["generated_text"]
+    # Format as markdown bullets
+    tips = generated.split(". ")  # Simple split assuming sentence-based output
+    advice_md = "\n\n---\n\n### 📚 AI-Generated Study Tips\n\n"
+    for tip in tips[:5]:  # Limit to 5
+        if tip.strip():
+            advice_md += f"- {tip.strip()}.\n"
+    advice_md += "\n**Pro tip**: Apply these tips consistently for better results!"
     return advice_md
 def summarize_long_text(text: str) -> str:
+    """Summarize arbitrarily long text safely + add AI study advice."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     chunks = chunk_text(text)
     summaries = []
     for chunk in chunks:
         summary = summarizer(
             chunk,
             do_sample=False
         )[0]["summary_text"]
         summaries.append(summary)
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
+    # Generate AI advice based on the summary
+    ai_advice = generate_ai_advice(cleaned_summary)
+    return cleaned_summary + ai_advice
 def read_pdf(file) -> str:
     """Safely extract text from PDF."""
     except Exception as e:
         return f"PDF read error: {e}"
 # =========================
 # Main handler
 # =========================
         text = read_pdf(file)
     return summarize_long_text(text)
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Long Text Summarizer + AI Study Assistant")
     gr.Markdown(
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
+        "• Includes **AI-generated study tips** based on the summary content"
     )
     text_input = gr.Textbox(
         lines=15,
         label="Paste text (optional)",
         placeholder="Paste lecture notes, textbook chapter, article..."
     )
     file_input = gr.File(
         label="Upload PDF (optional)",
         file_types=[".pdf"]
     )
     output = gr.Textbox(
         lines=16,
+        label="Summary + AI Study Advice",
+        placeholder="Summary appears first, followed by AI-generated learning tips..."
     )
+    summarize_btn = gr.Button("Summarize & Get AI Study Tips", variant="primary")
     summarize_btn.click(
         fn=process_input,
         inputs=[text_input, file_input],