Spaces:
Sleeping
Sleeping
aladhefafalquran commited on
Commit Β·
2ae8413
1
Parent(s): df78b63
Transform into detailed Study Guide Generator
Browse filesChanged from short summaries to comprehensive study guides:
- Increased chunk size: 1000 β 2500 chars for more context
- Added chunk overlap: 500 chars to maintain continuity
- Increased output length: max_length 130 β 400 words
- Increased minimum length: 30 β 150 words for substantial content
- Added section markers for organization
- Formatted output as proper study guide with tips
- Updated UI labels: 'Summary' β 'Detailed Study Guide'
- Changed title to 'PDF Study Guide Generator'
Now generates long, detailed study notes covering all important
points - perfect for exam preparation, not just short summaries.
app.py
CHANGED
|
@@ -10,7 +10,7 @@ device = 0 if torch.cuda.is_available() else -1
|
|
| 10 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
|
| 11 |
print("Model ready!")
|
| 12 |
|
| 13 |
-
def
|
| 14 |
if pdf_file is None:
|
| 15 |
return "Please upload a PDF file."
|
| 16 |
|
|
@@ -26,32 +26,85 @@ def summarize_pdf(pdf_file):
|
|
| 26 |
if not text.strip():
|
| 27 |
return "PDF is empty or contains no text."
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
chunk_size =
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
# Summarize each chunk
|
| 34 |
-
summaries = []
|
| 35 |
for i, chunk in enumerate(chunks):
|
| 36 |
-
if len(chunk.strip()) < 50:
|
| 37 |
-
continue
|
| 38 |
try:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
continue
|
| 43 |
|
| 44 |
-
# Combine
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
# Create
|
| 49 |
demo = gr.Interface(
|
| 50 |
-
fn=
|
| 51 |
-
inputs=gr.File(label="Upload PDF"),
|
| 52 |
-
outputs=gr.Textbox(
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
)
|
| 56 |
|
| 57 |
if __name__ == "__main__":
|
|
|
|
| 10 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
|
| 11 |
print("Model ready!")
|
| 12 |
|
| 13 |
+
def create_study_guide(pdf_file):
|
| 14 |
if pdf_file is None:
|
| 15 |
return "Please upload a PDF file."
|
| 16 |
|
|
|
|
| 26 |
if not text.strip():
|
| 27 |
return "PDF is empty or contains no text."
|
| 28 |
|
| 29 |
+
# Better chunking - split by paragraphs/sections (larger chunks for detailed recap)
|
| 30 |
+
chunk_size = 2500 # Larger chunks for more context
|
| 31 |
+
chunk_overlap = 500 # Overlap to maintain context
|
| 32 |
+
|
| 33 |
+
chunks = []
|
| 34 |
+
for i in range(0, len(text), chunk_size - chunk_overlap):
|
| 35 |
+
chunk = text[i:i + chunk_size]
|
| 36 |
+
if len(chunk.strip()) > 100: # Only process meaningful chunks
|
| 37 |
+
chunks.append(chunk)
|
| 38 |
+
|
| 39 |
+
# Create detailed study notes for each section
|
| 40 |
+
study_notes = []
|
| 41 |
+
total_chunks = len(chunks)
|
| 42 |
|
|
|
|
|
|
|
| 43 |
for i, chunk in enumerate(chunks):
|
|
|
|
|
|
|
| 44 |
try:
|
| 45 |
+
# Generate longer, more detailed summaries (study guide style)
|
| 46 |
+
# max_length increased significantly for detailed recap
|
| 47 |
+
result = summarizer(
|
| 48 |
+
chunk,
|
| 49 |
+
max_length=400, # Much longer output for detailed study notes
|
| 50 |
+
min_length=150, # Ensure substantial content
|
| 51 |
+
do_sample=False,
|
| 52 |
+
truncation=True
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
section_note = result[0]['summary_text']
|
| 56 |
+
|
| 57 |
+
# Add section marker for organization
|
| 58 |
+
study_notes.append(f"### Section {i+1}/{total_chunks}\n{section_note}")
|
| 59 |
+
|
| 60 |
+
except Exception as e:
|
| 61 |
+
# If a chunk fails, continue with others
|
| 62 |
continue
|
| 63 |
|
| 64 |
+
# Combine all study notes
|
| 65 |
+
if not study_notes:
|
| 66 |
+
return "Could not generate study guide. Please try a different PDF."
|
| 67 |
+
|
| 68 |
+
# Create formatted study guide
|
| 69 |
+
study_guide = f"""# π Study Guide
|
| 70 |
+
|
| 71 |
+
**Total Sections:** {len(study_notes)}
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
study_guide += "\n\n---\n\n".join(study_notes)
|
| 78 |
+
|
| 79 |
+
study_guide += f"""
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
## π Study Tips
|
| 84 |
+
|
| 85 |
+
- This guide covers the important points from your document
|
| 86 |
+
- Review each section carefully
|
| 87 |
+
- Focus on key concepts highlighted above
|
| 88 |
+
- Use this as your primary study material
|
| 89 |
+
|
| 90 |
+
*Generated study guide - {len(study_notes)} sections extracted*
|
| 91 |
+
"""
|
| 92 |
+
|
| 93 |
+
return study_guide
|
| 94 |
|
| 95 |
+
# Create interface
|
| 96 |
demo = gr.Interface(
|
| 97 |
+
fn=create_study_guide,
|
| 98 |
+
inputs=gr.File(label="π Upload PDF Document"),
|
| 99 |
+
outputs=gr.Textbox(
|
| 100 |
+
label="π Detailed Study Guide",
|
| 101 |
+
lines=30,
|
| 102 |
+
max_lines=50
|
| 103 |
+
),
|
| 104 |
+
title="π PDF Study Guide Generator",
|
| 105 |
+
description="Upload a PDF to generate a detailed study guide with important points for exam preparation. The guide will be comprehensive and cover all key concepts.",
|
| 106 |
+
examples=None,
|
| 107 |
+
allow_flagging="never"
|
| 108 |
)
|
| 109 |
|
| 110 |
if __name__ == "__main__":
|