Spaces:

Azidan
/

textSum

Sleeping

App Files Files Community

Azidan commited on 12 days ago

Commit

d8547ca

verified ·

1 Parent(s): 6a4c4d0

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -11

app.py CHANGED Viewed

@@ -28,8 +28,6 @@ SUBJECT_TIPS = {
         "Focus on understanding formulas and when to apply them.",
         "Work backwards from answers to see common mistake patterns."
     ],
-    "equation": SUBJECT_TIPS["math"] if "math" not in SUBJECT_TIPS else [],  # alias
-    "formula": SUBJECT_TIPS["math"],
     "physics": [
         "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
         "Practice unit conversions and dimensional analysis first.",
@@ -55,9 +53,14 @@ SUBJECT_TIPS = {
         "Compare this text to others you've read.",
         "Practice essay-style answers: thesis + evidence + analysis."
     ],
-    # Add more categories as needed: economics, programming, law, etc.
 }
 GENERAL_TIPS = [
     "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
     "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
@@ -70,6 +73,7 @@ GENERAL_TIPS = [
 # Utilities
 # =========================
 def clean_text(text: str) -> str:
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
@@ -87,6 +91,7 @@ def clean_text(text: str) -> str:
 def chunk_text(text: str):
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
@@ -97,11 +102,15 @@ def chunk_text(text: str):
 def get_simple_keywords(summary: str, top_n=15):
-    """Very basic keyword extraction: most frequent words (after removing stop/punct)."""
     text = summary.lower()
     text = text.translate(str.maketrans("", "", string.punctuation))
     words = text.split()
-    stop_words = {"the", "a", "an", "and", "or", "but", "is", "are", "was", "were", "this", "that", "these", "those", "in", "on", "at", "to", "of", "for", "with", "by", "from", "as", "it", "its"}
     filtered = [w for w in words if w not in stop_words and len(w) > 2]
     counter = Counter(filtered)
     return [word for word, _ in counter.most_common(top_n)]
@@ -116,11 +125,11 @@ def generate_dynamic_advice(summary: str):
     for word in keywords:
         for category, tips in SUBJECT_TIPS.items():
             if category in word and category not in seen_categories:
-                detected_tips.extend(tips[:2])  # take up to 2 per category
                 seen_categories.add(category)
-    # Always add 3–4 general ones
-    selected_general = GENERAL_TIPS[:4]  # or random.sample if you import random
     all_tips = detected_tips + selected_general
@@ -131,12 +140,13 @@ def generate_dynamic_advice(summary: str):
     for tip in all_tips:
         advice_md += f"- {tip}\n"
-    advice_md += "\n**Pro tip**: Rewrite the summary in your own words after 24 hours — this locks in understanding!\n"
     return advice_md
 def summarize_long_text(text: str) -> str:
     if not text or len(text.strip()) == 0:
         return "No text provided."
@@ -155,13 +165,14 @@ def summarize_long_text(text: str) -> str:
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
-    # Dynamic advice
     dynamic_advice = generate_dynamic_advice(cleaned_summary)
     return cleaned_summary + dynamic_advice
 def read_pdf(file) -> str:
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
@@ -188,7 +199,7 @@ with gr.Blocks() as demo:
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
-        "• Includes **general + dynamic study tips** tailored to content keywords"
     )
     text_input = gr.Textbox(

         "Focus on understanding formulas and when to apply them.",
         "Work backwards from answers to see common mistake patterns."
     ],
     "physics": [
         "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
         "Practice unit conversions and dimensional analysis first.",
         "Compare this text to others you've read.",
         "Practice essay-style answers: thesis + evidence + analysis."
     ],
 }
+# Add aliases safely AFTER the dictionary is fully defined
+SUBJECT_TIPS["equation"] = SUBJECT_TIPS["math"]
+SUBJECT_TIPS["formula"] = SUBJECT_TIPS["math"]
+# You can easily add more: SUBJECT_TIPS["calculus"] = SUBJECT_TIPS["math"]
+# SUBJECT_TIPS["algebra"] = SUBJECT_TIPS["math"] etc.
 GENERAL_TIPS = [
     "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
     "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
 # Utilities
 # =========================
 def clean_text(text: str) -> str:
+    """Fix quotes, spacing, repetition, and broken punctuation."""
     text = text.replace("‘", "'").replace("’", "'")
     text = text.replace("“", '"').replace("”", '"')
     text = re.sub(r"[.]{2,}", ".", text)
 def chunk_text(text: str):
+    """Token-aware chunking to avoid model overflow."""
     tokens = tokenizer.encode(text, add_special_tokens=False)
     chunks = []
     for i in range(0, len(tokens), CHUNK_SIZE):
 def get_simple_keywords(summary: str, top_n=15):
+    """Very basic keyword extraction: most frequent meaningful words."""
     text = summary.lower()
     text = text.translate(str.maketrans("", "", string.punctuation))
     words = text.split()
+    stop_words = {
+        "the", "a", "an", "and", "or", "but", "is", "are", "was", "were",
+        "this", "that", "these", "those", "in", "on", "at", "to", "of",
+        "for", "with", "by", "from", "as", "it", "its", "be", "have", "has"
+    }
     filtered = [w for w in words if w not in stop_words and len(w) > 2]
     counter = Counter(filtered)
     return [word for word, _ in counter.most_common(top_n)]
     for word in keywords:
         for category, tips in SUBJECT_TIPS.items():
             if category in word and category not in seen_categories:
+                detected_tips.extend(tips[:2])  # max 2 tips per matched category
                 seen_categories.add(category)
+    # Always include some general advice
+    selected_general = GENERAL_TIPS[:4]
     all_tips = detected_tips + selected_general
     for tip in all_tips:
         advice_md += f"- {tip}\n"
+    advice_md += "\n**Pro tip**: Try rewriting the main ideas in your own words after 24 hours — it really helps long-term retention!\n"
     return advice_md
 def summarize_long_text(text: str) -> str:
+    """Summarize arbitrarily long text safely + add study advice."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     merged = " ".join(summaries)
     cleaned_summary = clean_text(merged)
+    # Generate dynamic study advice
     dynamic_advice = generate_dynamic_advice(cleaned_summary)
     return cleaned_summary + dynamic_advice
 def read_pdf(file) -> str:
+    """Safely extract text from PDF."""
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
         "• Handles **thousands of words**\n"
         "• Supports **PDF upload**\n"
         "• Optimized for **CPU / free tier**\n"
+        "• Includes **general + dynamic study tips** based on content keywords"
     )
     text_input = gr.Textbox(