aladhefafalquran commited on
Commit
b79eb66
Β·
1 Parent(s): 2ae8413

MAJOR UPGRADE: Enhanced Study Guide Generator

Browse files

Massive improvements to output quality and user experience:

✨ NEW FEATURES:
- 3 Detail Levels: Concise / Detailed / Very Detailed
- Smart sentence-boundary chunking (no mid-sentence cuts)
- Real-time progress updates with emojis
- Bullet point formatting for easy scanning
- Text cleaning (removes page numbers, fixes hyphenation)
- Better gr.Blocks UI with organized layout

πŸ“Š QUALITY IMPROVEMENTS:
- Intelligent text overlap between chunks for context
- Longer outputs: up to 500 words per section (Very Detailed)
- Extract key points as bullet lists
- Professional markdown formatting
- Document statistics (pages, words, sections)

πŸ“š STUDY FEATURES:
- Comprehensive header with document info
- Organized sections with clear numbering
- Study synthesis section with tips
- How to use this guide instructions
- Study strategy recommendations

🎨 UX IMPROVEMENTS:
- Live progress indicators during processing
- Clear status messages with icons
- Better error handling
- Helpful tips in sidebar
- Larger output area (25-40 lines)

This transforms the tool from a basic summarizer into a
professional study guide generator perfect for exam prep!

Files changed (1) hide show
  1. app.py +229 -67
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  import fitz
4
  from transformers import pipeline
@@ -10,102 +11,263 @@ device = 0 if torch.cuda.is_available() else -1
10
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
11
  print("Model ready!")
12
 
13
- def create_study_guide(pdf_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  if pdf_file is None:
15
- return "Please upload a PDF file."
16
 
17
- # Extract text
18
- text = ""
19
  try:
 
 
 
20
  with fitz.open(pdf_file.name) as doc:
21
- for page in doc:
 
22
  text += page.get_text()
23
- except Exception as e:
24
- return f"Error reading PDF: {str(e)}"
25
 
26
- if not text.strip():
27
- return "PDF is empty or contains no text."
 
28
 
29
- # Better chunking - split by paragraphs/sections (larger chunks for detailed recap)
30
- chunk_size = 2500 # Larger chunks for more context
31
- chunk_overlap = 500 # Overlap to maintain context
 
32
 
33
- chunks = []
34
- for i in range(0, len(text), chunk_size - chunk_overlap):
35
- chunk = text[i:i + chunk_size]
36
- if len(chunk.strip()) > 100: # Only process meaningful chunks
37
- chunks.append(chunk)
 
 
 
 
 
 
 
 
38
 
39
- # Create detailed study notes for each section
40
- study_notes = []
41
- total_chunks = len(chunks)
 
42
 
43
- for i, chunk in enumerate(chunks):
44
- try:
45
- # Generate longer, more detailed summaries (study guide style)
46
- # max_length increased significantly for detailed recap
47
- result = summarizer(
48
- chunk,
49
- max_length=400, # Much longer output for detailed study notes
50
- min_length=150, # Ensure substantial content
51
- do_sample=False,
52
- truncation=True
53
- )
 
 
 
 
 
 
 
 
 
54
 
55
- section_note = result[0]['summary_text']
 
 
 
 
56
 
57
- # Add section marker for organization
58
- study_notes.append(f"### Section {i+1}/{total_chunks}\n{section_note}")
59
 
60
- except Exception as e:
61
- # If a chunk fails, continue with others
62
- continue
63
 
64
- # Combine all study notes
65
- if not study_notes:
66
- return "Could not generate study guide. Please try a different PDF."
67
 
68
- # Create formatted study guide
69
- study_guide = f"""# πŸ“š Study Guide
70
 
71
- **Total Sections:** {len(study_notes)}
 
 
 
 
72
 
73
  ---
74
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  """
76
 
77
- study_guide += "\n\n---\n\n".join(study_notes)
 
 
78
 
79
- study_guide += f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  ---
82
 
83
- ## πŸ“– Study Tips
 
 
 
 
 
 
 
 
84
 
85
- - This guide covers the important points from your document
86
- - Review each section carefully
87
- - Focus on key concepts highlighted above
88
- - Use this as your primary study material
89
 
90
- *Generated study guide - {len(study_notes)} sections extracted*
91
  """
92
 
93
- return study_guide
94
-
95
- # Create interface
96
- demo = gr.Interface(
97
- fn=create_study_guide,
98
- inputs=gr.File(label="πŸ“„ Upload PDF Document"),
99
- outputs=gr.Textbox(
100
- label="πŸ“š Detailed Study Guide",
101
- lines=30,
102
- max_lines=50
103
- ),
104
- title="πŸ“š PDF Study Guide Generator",
105
- description="Upload a PDF to generate a detailed study guide with important points for exam preparation. The guide will be comprehensive and cover all key concepts.",
106
- examples=None,
107
- allow_flagging="never"
108
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  if __name__ == "__main__":
111
  demo.launch()
 
1
  import os
2
+ import re
3
  import gradio as gr
4
  import fitz
5
  from transformers import pipeline
 
11
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
12
  print("Model ready!")
13
 
14
+ def clean_text(text):
15
+ """Clean and normalize extracted text."""
16
+ # Remove excessive whitespace
17
+ text = re.sub(r'\s+', ' ', text)
18
+ # Remove page numbers and headers/footers (common patterns)
19
+ text = re.sub(r'\n\d+\n', '\n', text)
20
+ # Fix common OCR issues
21
+ text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text) # Fix hyphenated words
22
+ return text.strip()
23
+
24
+ def smart_chunk_text(text, chunk_size=3000, overlap=600):
25
+ """
26
+ Intelligently chunk text by trying to break at sentence boundaries.
27
+ """
28
+ sentences = re.split(r'(?<=[.!?])\s+', text)
29
+ chunks = []
30
+ current_chunk = ""
31
+
32
+ for sentence in sentences:
33
+ if len(current_chunk) + len(sentence) < chunk_size:
34
+ current_chunk += sentence + " "
35
+ else:
36
+ if current_chunk:
37
+ chunks.append(current_chunk.strip())
38
+ current_chunk = sentence + " "
39
+
40
+ if current_chunk:
41
+ chunks.append(current_chunk.strip())
42
+
43
+ # Add overlap between chunks for context
44
+ overlapped_chunks = []
45
+ for i, chunk in enumerate(chunks):
46
+ if i > 0 and overlap > 0:
47
+ # Add last part of previous chunk for context
48
+ prev_words = chunks[i-1].split()[-overlap:]
49
+ chunk = " ".join(prev_words) + " " + chunk
50
+ overlapped_chunks.append(chunk)
51
+
52
+ return overlapped_chunks
53
+
54
+ def extract_key_points(summary_text):
55
+ """Format summary as bullet points for better readability."""
56
+ # Split into sentences
57
+ sentences = re.split(r'(?<=[.!?])\s+', summary_text)
58
+
59
+ # Create bullet points
60
+ bullet_points = []
61
+ for sentence in sentences:
62
+ sentence = sentence.strip()
63
+ if len(sentence) > 20: # Only substantial sentences
64
+ bullet_points.append(f"β€’ {sentence}")
65
+
66
+ return "\n".join(bullet_points)
67
+
68
+ def create_study_guide(pdf_file, detail_level="Detailed"):
69
  if pdf_file is None:
70
+ return "⚠️ Please upload a PDF file first."
71
 
 
 
72
  try:
73
+ # Extract text
74
+ yield "πŸ“„ Extracting text from PDF..."
75
+ text = ""
76
  with fitz.open(pdf_file.name) as doc:
77
+ total_pages = len(doc)
78
+ for page_num, page in enumerate(doc, 1):
79
  text += page.get_text()
80
+ if page_num % 5 == 0:
81
+ yield f"πŸ“„ Reading pages... {page_num}/{total_pages}"
82
 
83
+ if not text.strip():
84
+ yield "❌ PDF is empty or contains no readable text."
85
+ return
86
 
87
+ # Clean text
88
+ yield "🧹 Cleaning and processing text..."
89
+ text = clean_text(text)
90
+ word_count = len(text.split())
91
 
92
+ # Determine parameters based on detail level
93
+ if detail_level == "Very Detailed":
94
+ chunk_size = 3500
95
+ max_length = 500
96
+ min_length = 200
97
+ elif detail_level == "Detailed":
98
+ chunk_size = 3000
99
+ max_length = 400
100
+ min_length = 150
101
+ else: # Concise
102
+ chunk_size = 2500
103
+ max_length = 300
104
+ min_length = 100
105
 
106
+ # Smart chunking
107
+ yield "πŸ“ Dividing into logical sections..."
108
+ chunks = smart_chunk_text(text, chunk_size=chunk_size, overlap=100)
109
+ total_chunks = len(chunks)
110
 
111
+ # Process each chunk
112
+ study_sections = []
113
+ for i, chunk in enumerate(chunks, 1):
114
+ yield f"πŸ€– Generating study notes for section {i}/{total_chunks}..."
115
+
116
+ try:
117
+ # Generate detailed summary
118
+ result = summarizer(
119
+ chunk,
120
+ max_length=max_length,
121
+ min_length=min_length,
122
+ do_sample=False,
123
+ truncation=True,
124
+ early_stopping=True
125
+ )
126
+
127
+ section_summary = result[0]['summary_text']
128
+
129
+ # Format as bullet points for readability
130
+ formatted_section = extract_key_points(section_summary)
131
 
132
+ study_sections.append({
133
+ 'number': i,
134
+ 'content': formatted_section,
135
+ 'raw': section_summary
136
+ })
137
 
138
+ except Exception as e:
139
+ continue
140
 
141
+ if not study_sections:
142
+ yield "❌ Could not generate study guide. Please try a different PDF."
143
+ return
144
 
145
+ # Create comprehensive study guide
146
+ yield "✨ Formatting your study guide..."
 
147
 
148
+ study_guide = f"""# πŸ“š COMPREHENSIVE STUDY GUIDE
 
149
 
150
+ **Document:** {os.path.basename(pdf_file.name)}
151
+ **Pages:** {total_pages}
152
+ **Words in Original:** {word_count:,}
153
+ **Study Sections Generated:** {len(study_sections)}
154
+ **Detail Level:** {detail_level}
155
 
156
  ---
157
 
158
+ ## πŸ“– KEY CONCEPTS AND IMPORTANT POINTS
159
+
160
+ """
161
+
162
+ # Add all sections
163
+ for section in study_sections:
164
+ study_guide += f"""
165
+ ### πŸ“Œ Section {section['number']} of {total_chunks}
166
+
167
+ {section['content']}
168
+
169
+ ---
170
  """
171
 
172
+ # Add synthesis section if we have multiple sections
173
+ if len(study_sections) > 1:
174
+ study_guide += """
175
 
176
+ ## 🎯 STUDY SYNTHESIS
177
+
178
+ This study guide extracted the most important points from your document. Each section above covers key concepts you need to understand.
179
+
180
+ ### πŸ“‹ How to Use This Guide:
181
+
182
+ 1. **First Read**: Go through all sections to get an overview
183
+ 2. **Deep Dive**: Study each section carefully, one at a time
184
+ 3. **Make Connections**: Link concepts between different sections
185
+ 4. **Active Recall**: Try to remember key points without looking
186
+ 5. **Review Regularly**: Come back to this guide before your exam
187
+
188
+ ### πŸ’‘ Study Tips:
189
+
190
+ β€’ Focus on understanding the concepts, not memorizing word-for-word
191
+ β€’ Create your own examples for each key point
192
+ β€’ Explain these concepts to someone else to test your understanding
193
+ β€’ Highlight or annotate the most important points for quick review
194
 
195
  ---
196
 
197
+ """
198
+
199
+ study_guide += f"""
200
+ ## βœ… STUDY GUIDE COMPLETE
201
+
202
+ **Total Sections Processed:** {len(study_sections)}/{total_chunks}
203
+ **Coverage:** Comprehensive overview of all important topics
204
+
205
+ *This study guide was generated using AI to extract and organize the most relevant information for your studies. Use it as your primary study material alongside your class notes.*
206
 
207
+ ---
 
 
 
208
 
209
+ πŸ“š Good luck with your studies! πŸŽ“
210
  """
211
 
212
+ yield study_guide
213
+
214
+ except Exception as e:
215
+ yield f"❌ Error: {str(e)}\n\nPlease try uploading the PDF again."
216
+
217
+ # Create enhanced interface
218
+ with gr.Blocks(title="PDF Study Guide Generator", theme=gr.themes.Soft()) as demo:
219
+ gr.Markdown("""
220
+ # πŸ“š AI-Powered Study Guide Generator
221
+
222
+ Upload your PDF and get a comprehensive, well-organized study guide perfect for exam preparation!
223
+ """)
224
+
225
+ with gr.Row():
226
+ with gr.Column():
227
+ pdf_input = gr.File(
228
+ label="πŸ“„ Upload Your PDF Document",
229
+ file_types=[".pdf"]
230
+ )
231
+
232
+ detail_level = gr.Radio(
233
+ choices=["Concise", "Detailed", "Very Detailed"],
234
+ value="Detailed",
235
+ label="πŸ“Š Detail Level",
236
+ info="Choose how comprehensive you want your study guide"
237
+ )
238
+
239
+ generate_btn = gr.Button("πŸš€ Generate Study Guide", variant="primary", size="lg")
240
+
241
+ gr.Markdown("""
242
+ ### πŸ’‘ Tips:
243
+ - **Concise**: Quick overview, main points only
244
+ - **Detailed**: Balanced coverage (recommended)
245
+ - **Very Detailed**: Comprehensive, thorough notes
246
+ """)
247
+
248
+ with gr.Column():
249
+ output = gr.Textbox(
250
+ label="πŸ“š Your Study Guide",
251
+ lines=25,
252
+ max_lines=40,
253
+ placeholder="Your detailed study guide will appear here...\n\nIt will include:\nβ€’ Organized sections\nβ€’ Key concepts\nβ€’ Important points\nβ€’ Study tips"
254
+ )
255
+
256
+ generate_btn.click(
257
+ fn=create_study_guide,
258
+ inputs=[pdf_input, detail_level],
259
+ outputs=output
260
+ )
261
+
262
+ gr.Markdown("""
263
+ ---
264
+ ### 🎯 What You'll Get:
265
+ - βœ… Comprehensive coverage of all important topics
266
+ - βœ… Organized in easy-to-study sections
267
+ - βœ… Bullet points for quick scanning
268
+ - βœ… Study tips and guidance
269
+ - βœ… Perfect for exam preparation
270
+ """)
271
 
272
  if __name__ == "__main__":
273
  demo.launch()