Azidan commited on
Commit
8c3fb35
Β·
verified Β·
1 Parent(s): d8547ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -114
app.py CHANGED
@@ -2,8 +2,6 @@ import gradio as gr
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
5
- from collections import Counter
6
- import string
7
 
8
  # =========================
9
  # Model setup (CPU-safe)
@@ -17,57 +15,14 @@ summarizer = pipeline(
17
  device=-1 # CPU only
18
  )
19
 
20
- CHUNK_SIZE = 900 # safe margin
21
-
22
- # =========================
23
- # Subject-specific tip triggers (expandable)
24
- # =========================
25
- SUBJECT_TIPS = {
26
- "math": [
27
- "Practice similar problems step-by-step β€” repetition builds fluency.",
28
- "Focus on understanding formulas and when to apply them.",
29
- "Work backwards from answers to see common mistake patterns."
30
- ],
31
- "physics": [
32
- "Draw free-body diagrams or sketch scenarios to visualize forces/concepts.",
33
- "Practice unit conversions and dimensional analysis first.",
34
- "Solve numerical examples to connect theory to real numbers."
35
- ],
36
- "chemistry": [
37
- "Draw reaction mechanisms and label reactants/products.",
38
- "Make flashcards for periodic trends, solubility rules, or functional groups.",
39
- "Balance equations repeatedly until it's automatic."
40
- ],
41
- "biology": [
42
- "Draw and label diagrams (cells, cycles, anatomy) from memory.",
43
- "Use mnemonics for processes (e.g., Krebs cycle steps).",
44
- "Compare/contrast similar concepts (mitosis vs meiosis)."
45
- ],
46
- "history": [
47
- "Create a timeline or flowchart of events and causes/effects.",
48
- "Make cause-effect chains and link them to bigger themes.",
49
- "Quiz yourself on dates, people, and turning points."
50
- ],
51
- "literature": [
52
- "Identify themes, symbols, and character development β€” write short explanations.",
53
- "Compare this text to others you've read.",
54
- "Practice essay-style answers: thesis + evidence + analysis."
55
- ],
56
- }
57
-
58
- # Add aliases safely AFTER the dictionary is fully defined
59
- SUBJECT_TIPS["equation"] = SUBJECT_TIPS["math"]
60
- SUBJECT_TIPS["formula"] = SUBJECT_TIPS["math"]
61
- # You can easily add more: SUBJECT_TIPS["calculus"] = SUBJECT_TIPS["math"]
62
- # SUBJECT_TIPS["algebra"] = SUBJECT_TIPS["math"] etc.
63
 
64
- GENERAL_TIPS = [
65
- "Use **Active Recall**: Cover the summary and explain key points out loud or in writing.",
66
- "Apply **Spaced Repetition**: Review today, in 2–3 days, then in a week (try Anki).",
67
- "Use **Feynman Technique**: Explain it simply as if teaching a younger student.",
68
- "Create 3–5 self-test questions from the summary and answer without looking.",
69
- "Draw a quick mind map connecting the main ideas."
70
- ]
71
 
72
  # =========================
73
  # Utilities
@@ -89,7 +44,6 @@ def clean_text(text: str) -> str:
89
  result.append(s.strip())
90
  return " ".join(result)
91
 
92
-
93
  def chunk_text(text: str):
94
  """Token-aware chunking to avoid model overflow."""
95
  tokens = tokenizer.encode(text, add_special_tokens=False)
@@ -100,59 +54,34 @@ def chunk_text(text: str):
100
  chunks.append(chunk_text)
101
  return chunks
102
 
103
-
104
- def get_simple_keywords(summary: str, top_n=15):
105
- """Very basic keyword extraction: most frequent meaningful words."""
106
- text = summary.lower()
107
- text = text.translate(str.maketrans("", "", string.punctuation))
108
- words = text.split()
109
- stop_words = {
110
- "the", "a", "an", "and", "or", "but", "is", "are", "was", "were",
111
- "this", "that", "these", "those", "in", "on", "at", "to", "of",
112
- "for", "with", "by", "from", "as", "it", "its", "be", "have", "has"
113
- }
114
- filtered = [w for w in words if w not in stop_words and len(w) > 2]
115
- counter = Counter(filtered)
116
- return [word for word, _ in counter.most_common(top_n)]
117
-
118
-
119
- def generate_dynamic_advice(summary: str):
120
- keywords = get_simple_keywords(summary)
121
-
122
- detected_tips = []
123
- seen_categories = set()
124
-
125
- for word in keywords:
126
- for category, tips in SUBJECT_TIPS.items():
127
- if category in word and category not in seen_categories:
128
- detected_tips.extend(tips[:2]) # max 2 tips per matched category
129
- seen_categories.add(category)
130
-
131
- # Always include some general advice
132
- selected_general = GENERAL_TIPS[:4]
133
-
134
- all_tips = detected_tips + selected_general
135
-
136
- if not all_tips:
137
- all_tips = GENERAL_TIPS[:4]
138
-
139
- advice_md = "\n\n---\n\n### πŸ“š Personalized Study Tips (based on content)\n\n"
140
- for tip in all_tips:
141
- advice_md += f"- {tip}\n"
142
 
143
- advice_md += "\n**Pro tip**: Try rewriting the main ideas in your own words after 24 hours β€” it really helps long-term retention!\n"
 
 
 
 
 
 
144
 
145
  return advice_md
146
 
147
-
148
  def summarize_long_text(text: str) -> str:
149
- """Summarize arbitrarily long text safely + add study advice."""
150
  if not text or len(text.strip()) == 0:
151
  return "No text provided."
152
-
153
  chunks = chunk_text(text)
154
  summaries = []
155
-
156
  for chunk in chunks:
157
  summary = summarizer(
158
  chunk,
@@ -161,15 +90,14 @@ def summarize_long_text(text: str) -> str:
161
  do_sample=False
162
  )[0]["summary_text"]
163
  summaries.append(summary)
164
-
165
  merged = " ".join(summaries)
166
  cleaned_summary = clean_text(merged)
167
 
168
- # Generate dynamic study advice
169
- dynamic_advice = generate_dynamic_advice(cleaned_summary)
170
 
171
- return cleaned_summary + dynamic_advice
172
-
173
 
174
  def read_pdf(file) -> str:
175
  """Safely extract text from PDF."""
@@ -180,7 +108,6 @@ def read_pdf(file) -> str:
180
  except Exception as e:
181
  return f"PDF read error: {e}"
182
 
183
-
184
  # =========================
185
  # Main handler
186
  # =========================
@@ -189,38 +116,37 @@ def process_input(text, file):
189
  text = read_pdf(file)
190
  return summarize_long_text(text)
191
 
192
-
193
  # =========================
194
  # Gradio UI
195
  # =========================
196
  with gr.Blocks() as demo:
197
- gr.Markdown("# πŸ“„ Long Text Summarizer + Study Assistant")
198
  gr.Markdown(
199
  "β€’ Handles **thousands of words**\n"
200
  "β€’ Supports **PDF upload**\n"
201
  "β€’ Optimized for **CPU / free tier**\n"
202
- "β€’ Includes **general + dynamic study tips** based on content keywords"
203
  )
204
-
205
  text_input = gr.Textbox(
206
  lines=15,
207
  label="Paste text (optional)",
208
  placeholder="Paste lecture notes, textbook chapter, article..."
209
  )
210
-
211
  file_input = gr.File(
212
  label="Upload PDF (optional)",
213
  file_types=[".pdf"]
214
  )
215
-
216
  output = gr.Textbox(
217
  lines=16,
218
- label="Summary + Personalized Study Advice",
219
- placeholder="Summary appears first, followed by tailored learning tips..."
220
  )
221
-
222
- summarize_btn = gr.Button("Summarize & Get Study Tips", variant="primary")
223
-
224
  summarize_btn.click(
225
  fn=process_input,
226
  inputs=[text_input, file_input],
 
2
  import re
3
  from transformers import pipeline, AutoTokenizer
4
  from PyPDF2 import PdfReader
 
 
5
 
6
  # =========================
7
  # Model setup (CPU-safe)
 
15
  device=-1 # CPU only
16
  )
17
 
18
+ # New AI advice generator - lightweight text2text model
19
+ advice_generator = pipeline(
20
+ "text2text-generation",
21
+ model="google/flan-t5-small",
22
+ device=-1 # CPU only
23
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ CHUNK_SIZE = 900 # safe margin for summarizer
 
 
 
 
 
 
26
 
27
  # =========================
28
  # Utilities
 
44
  result.append(s.strip())
45
  return " ".join(result)
46
 
 
47
  def chunk_text(text: str):
48
  """Token-aware chunking to avoid model overflow."""
49
  tokens = tokenizer.encode(text, add_special_tokens=False)
 
54
  chunks.append(chunk_text)
55
  return chunks
56
 
57
+ def generate_ai_advice(summary: str) -> str:
58
+ """Use AI to generate personalized study advice based on the summary."""
59
+ # Truncate summary if too long for the small model
60
+ truncated_summary = summary[:800] # Safe limit for flan-t5-small
61
+ prompt = (
62
+ f"Based on this summary: {truncated_summary}\n"
63
+ "Generate 5 concise study tips for a student to enhance learning and retention."
64
+ )
65
+ generated = advice_generator(prompt, max_length=200, num_return_sequences=1)[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ # Format as markdown bullets
68
+ tips = generated.split(". ") # Simple split assuming sentence-based output
69
+ advice_md = "\n\n---\n\n### πŸ“š AI-Generated Study Tips\n\n"
70
+ for tip in tips[:5]: # Limit to 5
71
+ if tip.strip():
72
+ advice_md += f"- {tip.strip()}.\n"
73
+ advice_md += "\n**Pro tip**: Apply these tips consistently for better results!"
74
 
75
  return advice_md
76
 
 
77
  def summarize_long_text(text: str) -> str:
78
+ """Summarize arbitrarily long text safely + add AI study advice."""
79
  if not text or len(text.strip()) == 0:
80
  return "No text provided."
81
+
82
  chunks = chunk_text(text)
83
  summaries = []
84
+
85
  for chunk in chunks:
86
  summary = summarizer(
87
  chunk,
 
90
  do_sample=False
91
  )[0]["summary_text"]
92
  summaries.append(summary)
93
+
94
  merged = " ".join(summaries)
95
  cleaned_summary = clean_text(merged)
96
 
97
+ # Generate AI advice based on the summary
98
+ ai_advice = generate_ai_advice(cleaned_summary)
99
 
100
+ return cleaned_summary + ai_advice
 
101
 
102
  def read_pdf(file) -> str:
103
  """Safely extract text from PDF."""
 
108
  except Exception as e:
109
  return f"PDF read error: {e}"
110
 
 
111
  # =========================
112
  # Main handler
113
  # =========================
 
116
  text = read_pdf(file)
117
  return summarize_long_text(text)
118
 
 
119
  # =========================
120
  # Gradio UI
121
  # =========================
122
  with gr.Blocks() as demo:
123
+ gr.Markdown("# πŸ“„ Long Text Summarizer + AI Study Assistant")
124
  gr.Markdown(
125
  "β€’ Handles **thousands of words**\n"
126
  "β€’ Supports **PDF upload**\n"
127
  "β€’ Optimized for **CPU / free tier**\n"
128
+ "β€’ Includes **AI-generated study tips** based on the summary content"
129
  )
130
+
131
  text_input = gr.Textbox(
132
  lines=15,
133
  label="Paste text (optional)",
134
  placeholder="Paste lecture notes, textbook chapter, article..."
135
  )
136
+
137
  file_input = gr.File(
138
  label="Upload PDF (optional)",
139
  file_types=[".pdf"]
140
  )
141
+
142
  output = gr.Textbox(
143
  lines=16,
144
+ label="Summary + AI Study Advice",
145
+ placeholder="Summary appears first, followed by AI-generated learning tips..."
146
  )
147
+
148
+ summarize_btn = gr.Button("Summarize & Get AI Study Tips", variant="primary")
149
+
150
  summarize_btn.click(
151
  fn=process_input,
152
  inputs=[text_input, file_input],