Spaces:

Azidan
/

textSum

Sleeping

App Files Files Community

Azidan commited on 27 days ago

Commit

6a4c4d0

verified ·

1 Parent(s): 2f00a52

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -31

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 import re
 from transformers import pipeline, AutoTokenizer
 from PyPDF2 import PdfReader
 # =========================
 # Model setup (CPU-safe)
@@ -17,11 +19,57 @@ summarizer = pipeline(
 CHUNK_SIZE = 900  # safe margin
 # =========================
 # Utilities
 # =========================
 def clean_text(text: str) -> str:
-    """Fix quotes, spacing, repetition, and broken punctuation."""
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
@@ -39,7 +87,6 @@ def clean_text(text: str) -> str:
 def chunk_text(text: str):
-    """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
@@ -49,8 +96,47 @@ def chunk_text(text: str):
     return chunks
 def summarize_long_text(text: str) -> str:
-    """Summarize arbitrarily long text safely."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
@@ -69,32 +155,13 @@ def summarize_long_text(text: str) -> str:
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
-    # Add study advice section
-    study_advice = """
----
-### 📚 How to Study This Summary Effectively
-Here are some proven techniques to help you learn and remember the material better:
-- **Active Recall** — Cover the summary (or close your eyes) and try to explain each main point in your own words. This is one of the most powerful ways to strengthen memory.
-- **Spaced Repetition** — Review this summary today, again in 2–3 days, then in one week. Use free apps like Anki or Quizlet to turn key points into flashcards.
-- **Feynman Technique** — Pretend you're teaching this topic to a friend (or a 12-year-old). Explaining it simply reveals what you truly understand.
-- **Self-Testing** — Create 3–5 questions from the summary (e.g. “What is…?”, “Why does…?”, “Give an example of…”). Answer them without looking.
-- **Make Connections** — Draw a quick mind map or diagram linking the main ideas together. This helps see the big picture.
-- **Apply It** — If possible, solve related problems, write a short paragraph, or discuss the topic with someone.
-Re-reading alone is weak — **active engagement** is what makes information stick!
-Good luck with your studies! 🚀
-"""
-    return cleaned_summary + study_advice
 def read_pdf(file) -> str:
-    """Safely extract text from PDF."""
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
@@ -116,18 +183,18 @@ def process_input(text, file):
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Long Text Summarizer (Free-Tier Safe)")
     gr.Markdown(
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
-        "• Includes **study tips** to help you learn better"
     )
     text_input = gr.Textbox(
         lines=15,
         label="Paste text (optional)",
-        placeholder="Paste your lecture notes, article, or book chapter here..."
     )
     file_input = gr.File(
@@ -136,9 +203,9 @@ with gr.Blocks() as demo:
     )
     output = gr.Textbox(
-        lines=14,
-        label="Summary + Study Advice",
-        placeholder="Your summary and learning tips will appear here..."
     )
     summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")

 import re
 from transformers import pipeline, AutoTokenizer
 from PyPDF2 import PdfReader
+from collections import Counter
+import string
 # =========================
 # Model setup (CPU-safe)
 CHUNK_SIZE = 900  # safe margin
+# =========================
+# Subject-specific tip triggers (expandable)
+# =========================
+SUBJECT_TIPS = {
+    "math": [
+        "Practice similar problems step-by-step — repetition builds fluency.",
+        "Focus on understanding formulas and when to apply them.",
+        "Work backwards from answers to see common mistake patterns."
+    ],
+    "equation": SUBJECT_TIPS["math"] if "math" not in SUBJECT_TIPS else [],  # alias
+    "formula": SUBJECT_TIPS["math"],
+    "physics": [
+        "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
+        "Practice unit conversions and dimensional analysis first.",
+        "Solve numerical examples to connect theory to real numbers."
+    ],
+    "chemistry": [
+        "Draw reaction mechanisms and label reactants/products.",
+        "Make flashcards for periodic trends, solubility rules, or functional groups.",
+        "Balance equations repeatedly until it's automatic."
+    ],
+    "biology": [
+        "Draw and label diagrams (cells, cycles, anatomy) from memory.",
+        "Use mnemonics for processes (e.g., Krebs cycle steps).",
+        "Compare/contrast similar concepts (mitosis vs meiosis)."
+    ],
+    "history": [
+        "Create a timeline or flowchart of events and causes/effects.",
+        "Make cause-effect chains and link them to bigger themes.",
+        "Quiz yourself on dates, people, and turning points."
+    ],
+    "literature": [
+        "Identify themes, symbols, and character development — write short explanations.",
+        "Compare this text to others you've read.",
+        "Practice essay-style answers: thesis + evidence + analysis."
+    ],
+    # Add more categories as needed: economics, programming, law, etc.
+}
+GENERAL_TIPS = [
+    "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
+    "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
+    "Use **Feynman Technique**: Explain it simply as if teaching a younger student.",
+    "Create 3–5 self-test questions from the summary and answer without looking.",
+    "Draw a quick mind map connecting the main ideas."
+]
 # =========================
 # Utilities
 # =========================
 def clean_text(text: str) -> str:
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
 def chunk_text(text: str):
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
     return chunks
+def get_simple_keywords(summary: str, top_n=15):
+    """Very basic keyword extraction: most frequent words (after removing stop/punct)."""
+    text = summary.lower()
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    words = text.split()
+    stop_words = {"the", "a", "an", "and", "or", "but", "is", "are", "was", "were", "this", "that", "these", "those", "in", "on", "at", "to", "of", "for", "with", "by", "from", "as", "it", "its"}
+    filtered = [w for w in words if w not in stop_words and len(w) > 2]
+    counter = Counter(filtered)
+    return [word for word, _ in counter.most_common(top_n)]
+def generate_dynamic_advice(summary: str):
+    keywords = get_simple_keywords(summary)
+    detected_tips = []
+    seen_categories = set()
+    for word in keywords:
+        for category, tips in SUBJECT_TIPS.items():
+            if category in word and category not in seen_categories:
+                detected_tips.extend(tips[:2])  # take up to 2 per category
+                seen_categories.add(category)
+    # Always add 3–4 general ones
+    selected_general = GENERAL_TIPS[:4]  # or random.sample if you import random
+    all_tips = detected_tips + selected_general
+    if not all_tips:
+        all_tips = GENERAL_TIPS[:4]
+    advice_md = "\n\n---\n\n### 📚 Personalized Study Tips (based on content)\n\n"
+    for tip in all_tips:
+        advice_md += f"- {tip}\n"
+    advice_md += "\n**Pro tip**: Rewrite the summary in your own words after 24 hours — this locks in understanding!\n"
+    return advice_md
 def summarize_long_text(text: str) -> str:
     if not text or len(text.strip()) == 0:
         return "No text provided."
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
+    # Dynamic advice
+    dynamic_advice = generate_dynamic_advice(cleaned_summary)
+    return cleaned_summary + dynamic_advice
 def read_pdf(file) -> str:
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Long Text Summarizer + Study Assistant")
     gr.Markdown(
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
+        "• Includes **general + dynamic study tips** tailored to content keywords"
     )
     text_input = gr.Textbox(
         lines=15,
         label="Paste text (optional)",
+        placeholder="Paste lecture notes, textbook chapter, article..."
     )
     file_input = gr.File(
     )
     output = gr.Textbox(
+        lines=16,
+        label="Summary + Personalized Study Advice",
+        placeholder="Summary appears first, followed by tailored learning tips..."
     )
     summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")