Azidan commited on
Commit
6a4c4d0
Β·
verified Β·
1 Parent(s): 2f00a52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -31
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
 
 
5
 
6
  # =========================
7
  # Model setup (CPU-safe)
@@ -17,11 +19,57 @@ summarizer = pipeline(
17
 
18
  CHUNK_SIZE = 900 # safe margin
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # =========================
21
  # Utilities
22
  # =========================
23
  def clean_text(text: str) -> str:
24
- """Fix quotes, spacing, repetition, and broken punctuation."""
25
  text = text.replace("β€˜", "'").replace("’", "'")
26
  text = text.replace("β€œ", '"').replace("”", '"')
27
  text = re.sub(r"[.]{2,}", ".", text)
@@ -39,7 +87,6 @@ def clean_text(text: str) -> str:
39
 
40
 
41
  def chunk_text(text: str):
42
- """Token-aware chunking to avoid model overflow."""
43
  tokens = tokenizer.encode(text, add_special_tokens=False)
44
  chunks = []
45
  for i in range(0, len(tokens), CHUNK_SIZE):
@@ -49,8 +96,47 @@ def chunk_text(text: str):
49
  return chunks
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def summarize_long_text(text: str) -> str:
53
- """Summarize arbitrarily long text safely."""
54
  if not text or len(text.strip()) == 0:
55
  return "No text provided."
56
 
@@ -69,32 +155,13 @@ def summarize_long_text(text: str) -> str:
69
  merged = " ".join(summaries)
70
  cleaned_summary = clean_text(merged)
71
 
72
- # Add study advice section
73
- study_advice = """
74
-
75
- ---
76
-
77
- ### πŸ“š How to Study This Summary Effectively
78
-
79
- Here are some proven techniques to help you learn and remember the material better:
80
-
81
- - **Active Recall** β€” Cover the summary (or close your eyes) and try to explain each main point in your own words. This is one of the most powerful ways to strengthen memory.
82
- - **Spaced Repetition** β€” Review this summary today, again in 2–3 days, then in one week. Use free apps like Anki or Quizlet to turn key points into flashcards.
83
- - **Feynman Technique** β€” Pretend you're teaching this topic to a friend (or a 12-year-old). Explaining it simply reveals what you truly understand.
84
- - **Self-Testing** β€” Create 3–5 questions from the summary (e.g. β€œWhat is…?”, β€œWhy does…?”, β€œGive an example of…”). Answer them without looking.
85
- - **Make Connections** β€” Draw a quick mind map or diagram linking the main ideas together. This helps see the big picture.
86
- - **Apply It** β€” If possible, solve related problems, write a short paragraph, or discuss the topic with someone.
87
-
88
- Re-reading alone is weak β€” **active engagement** is what makes information stick!
89
-
90
- Good luck with your studies! πŸš€
91
- """
92
 
93
- return cleaned_summary + study_advice
94
 
95
 
96
  def read_pdf(file) -> str:
97
- """Safely extract text from PDF."""
98
  try:
99
  reader = PdfReader(file)
100
  pages = [page.extract_text() or "" for page in reader.pages]
@@ -116,18 +183,18 @@ def process_input(text, file):
116
  # Gradio UI
117
  # =========================
118
  with gr.Blocks() as demo:
119
- gr.Markdown("# πŸ“„ Long Text Summarizer (Free-Tier Safe)")
120
  gr.Markdown(
121
  "β€’ Handles **thousands of words**\n"
122
  "β€’ Supports **PDF upload**\n"
123
  "β€’ Optimized for **CPU / free tier**\n"
124
- "β€’ Includes **study tips** to help you learn better"
125
  )
126
 
127
  text_input = gr.Textbox(
128
  lines=15,
129
  label="Paste text (optional)",
130
- placeholder="Paste your lecture notes, article, or book chapter here..."
131
  )
132
 
133
  file_input = gr.File(
@@ -136,9 +203,9 @@ with gr.Blocks() as demo:
136
  )
137
 
138
  output = gr.Textbox(
139
- lines=14,
140
- label="Summary + Study Advice",
141
- placeholder="Your summary and learning tips will appear here..."
142
  )
143
 
144
  summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
 
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
5
+ from collections import Counter
6
+ import string
7
 
8
  # =========================
9
  # Model setup (CPU-safe)
 
19
 
20
  CHUNK_SIZE = 900 # safe margin
21
 
22
+ # =========================
23
+ # Subject-specific tip triggers (expandable)
24
+ # =========================
25
+ SUBJECT_TIPS = {
26
+ "math": [
27
+ "Practice similar problems step-by-step β€” repetition builds fluency.",
28
+ "Focus on understanding formulas and when to apply them.",
29
+ "Work backwards from answers to see common mistake patterns."
30
+ ],
31
+ "equation": SUBJECT_TIPS["math"] if "math" not in SUBJECT_TIPS else [], # alias
32
+ "formula": SUBJECT_TIPS["math"],
33
+ "physics": [
34
+ "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
35
+ "Practice unit conversions and dimensional analysis first.",
36
+ "Solve numerical examples to connect theory to real numbers."
37
+ ],
38
+ "chemistry": [
39
+ "Draw reaction mechanisms and label reactants/products.",
40
+ "Make flashcards for periodic trends, solubility rules, or functional groups.",
41
+ "Balance equations repeatedly until it's automatic."
42
+ ],
43
+ "biology": [
44
+ "Draw and label diagrams (cells, cycles, anatomy) from memory.",
45
+ "Use mnemonics for processes (e.g., Krebs cycle steps).",
46
+ "Compare/contrast similar concepts (mitosis vs meiosis)."
47
+ ],
48
+ "history": [
49
+ "Create a timeline or flowchart of events and causes/effects.",
50
+ "Make cause-effect chains and link them to bigger themes.",
51
+ "Quiz yourself on dates, people, and turning points."
52
+ ],
53
+ "literature": [
54
+ "Identify themes, symbols, and character development β€” write short explanations.",
55
+ "Compare this text to others you've read.",
56
+ "Practice essay-style answers: thesis + evidence + analysis."
57
+ ],
58
+ # Add more categories as needed: economics, programming, law, etc.
59
+ }
60
+
61
+ GENERAL_TIPS = [
62
+ "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
63
+ "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
64
+ "Use **Feynman Technique**: Explain it simply as if teaching a younger student.",
65
+ "Create 3–5 self-test questions from the summary and answer without looking.",
66
+ "Draw a quick mind map connecting the main ideas."
67
+ ]
68
+
69
  # =========================
70
  # Utilities
71
  # =========================
72
  def clean_text(text: str) -> str:
 
73
  text = text.replace("β€˜", "'").replace("’", "'")
74
  text = text.replace("β€œ", '"').replace("”", '"')
75
  text = re.sub(r"[.]{2,}", ".", text)
 
87
 
88
 
89
  def chunk_text(text: str):
 
90
  tokens = tokenizer.encode(text, add_special_tokens=False)
91
  chunks = []
92
  for i in range(0, len(tokens), CHUNK_SIZE):
 
96
  return chunks
97
 
98
 
99
+ def get_simple_keywords(summary: str, top_n=15):
100
+ """Very basic keyword extraction: most frequent words (after removing stop/punct)."""
101
+ text = summary.lower()
102
+ text = text.translate(str.maketrans("", "", string.punctuation))
103
+ words = text.split()
104
+ stop_words = {"the", "a", "an", "and", "or", "but", "is", "are", "was", "were", "this", "that", "these", "those", "in", "on", "at", "to", "of", "for", "with", "by", "from", "as", "it", "its"}
105
+ filtered = [w for w in words if w not in stop_words and len(w) > 2]
106
+ counter = Counter(filtered)
107
+ return [word for word, _ in counter.most_common(top_n)]
108
+
109
+
110
+ def generate_dynamic_advice(summary: str):
111
+ keywords = get_simple_keywords(summary)
112
+
113
+ detected_tips = []
114
+ seen_categories = set()
115
+
116
+ for word in keywords:
117
+ for category, tips in SUBJECT_TIPS.items():
118
+ if category in word and category not in seen_categories:
119
+ detected_tips.extend(tips[:2]) # take up to 2 per category
120
+ seen_categories.add(category)
121
+
122
+ # Always add 3–4 general ones
123
+ selected_general = GENERAL_TIPS[:4] # or random.sample if you import random
124
+
125
+ all_tips = detected_tips + selected_general
126
+
127
+ if not all_tips:
128
+ all_tips = GENERAL_TIPS[:4]
129
+
130
+ advice_md = "\n\n---\n\n### πŸ“š Personalized Study Tips (based on content)\n\n"
131
+ for tip in all_tips:
132
+ advice_md += f"- {tip}\n"
133
+
134
+ advice_md += "\n**Pro tip**: Rewrite the summary in your own words after 24 hours β€” this locks in understanding!\n"
135
+
136
+ return advice_md
137
+
138
+
139
  def summarize_long_text(text: str) -> str:
 
140
  if not text or len(text.strip()) == 0:
141
  return "No text provided."
142
 
 
155
  merged = " ".join(summaries)
156
  cleaned_summary = clean_text(merged)
157
 
158
+ # Dynamic advice
159
+ dynamic_advice = generate_dynamic_advice(cleaned_summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ return cleaned_summary + dynamic_advice
162
 
163
 
164
  def read_pdf(file) -> str:
 
165
  try:
166
  reader = PdfReader(file)
167
  pages = [page.extract_text() or "" for page in reader.pages]
 
183
  # Gradio UI
184
  # =========================
185
  with gr.Blocks() as demo:
186
+ gr.Markdown("# πŸ“„ Long Text Summarizer + Study Assistant")
187
  gr.Markdown(
188
  "β€’ Handles **thousands of words**\n"
189
  "β€’ Supports **PDF upload**\n"
190
  "β€’ Optimized for **CPU / free tier**\n"
191
+ "β€’ Includes **general + dynamic study tips** tailored to content keywords"
192
  )
193
 
194
  text_input = gr.Textbox(
195
  lines=15,
196
  label="Paste text (optional)",
197
+ placeholder="Paste lecture notes, textbook chapter, article..."
198
  )
199
 
200
  file_input = gr.File(
 
203
  )
204
 
205
  output = gr.Textbox(
206
+ lines=16,
207
+ label="Summary + Personalized Study Advice",
208
+ placeholder="Summary appears first, followed by tailored learning tips..."
209
  )
210
 
211
  summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")