Update app.py
Browse files
app.py
CHANGED
|
@@ -153,25 +153,27 @@ def generate_qa_pairs(chunk: str, num_questions: int = 2) -> List[Dict[str, str]
|
|
| 153 |
def process_pdf(pdf_file, questions_per_chunk: int = 2, max_chunks: int = 20):
|
| 154 |
"""Main processing function."""
|
| 155 |
if pdf_file is None:
|
| 156 |
-
return "Please upload a PDF file.",
|
| 157 |
|
| 158 |
try:
|
| 159 |
# Extract text
|
| 160 |
-
yield "π Extracting text from PDF...",
|
| 161 |
raw_text = extract_text_from_pdf(pdf_file)
|
| 162 |
|
| 163 |
if raw_text.startswith("Error"):
|
| 164 |
-
|
|
|
|
| 165 |
|
| 166 |
if len(raw_text.strip()) < 100:
|
| 167 |
-
|
|
|
|
| 168 |
|
| 169 |
# Clean text
|
| 170 |
-
yield "π§Ή Cleaning text...",
|
| 171 |
cleaned_text = clean_text(raw_text)
|
| 172 |
|
| 173 |
# Chunk text
|
| 174 |
-
yield "βοΈ Chunking text into sections...",
|
| 175 |
chunks = chunk_text(cleaned_text)
|
| 176 |
|
| 177 |
# Limit chunks for CPU performance
|
|
@@ -183,16 +185,17 @@ def process_pdf(pdf_file, questions_per_chunk: int = 2, max_chunks: int = 20):
|
|
| 183 |
|
| 184 |
for i, chunk in enumerate(chunks):
|
| 185 |
progress = f"π΄ Generating flashcards... ({i+1}/{total_chunks} chunks processed)"
|
| 186 |
-
yield progress,
|
| 187 |
|
| 188 |
cards = generate_qa_pairs(chunk, questions_per_chunk)
|
| 189 |
all_flashcards.extend(cards)
|
| 190 |
|
| 191 |
if not all_flashcards:
|
| 192 |
-
|
|
|
|
| 193 |
|
| 194 |
# Format output
|
| 195 |
-
yield "β
Finalizing...",
|
| 196 |
|
| 197 |
# Create formatted display
|
| 198 |
display_text = format_flashcards_display(all_flashcards)
|
|
@@ -208,10 +211,13 @@ def process_pdf(pdf_file, questions_per_chunk: int = 2, max_chunks: int = 20):
|
|
| 208 |
csv_lines.append(f'"{q}","{a}"')
|
| 209 |
csv_output = "\n".join(csv_lines)
|
| 210 |
|
| 211 |
-
|
|
|
|
| 212 |
|
| 213 |
except Exception as e:
|
| 214 |
-
|
|
|
|
|
|
|
| 215 |
|
| 216 |
def format_flashcards_display(flashcards: List[Dict]) -> str:
|
| 217 |
"""Format flashcards for nice display."""
|
|
@@ -335,21 +341,11 @@ with gr.Blocks(css=custom_css, title="PDF to Flashcards") as demo:
|
|
| 335 |
)
|
| 336 |
gr.Markdown("*Raw JSON data for custom applications*")
|
| 337 |
|
| 338 |
-
#
|
| 339 |
-
def update_display(status):
|
| 340 |
-
"""Update display when processing is done."""
|
| 341 |
-
if status and not status.startswith(("π", "π§Ή", "βοΈ", "π΄", "β
")):
|
| 342 |
-
return status
|
| 343 |
-
return gr.update()
|
| 344 |
-
|
| 345 |
process_btn.click(
|
| 346 |
fn=process_pdf,
|
| 347 |
inputs=[pdf_input, questions_per_chunk, max_chunks],
|
| 348 |
-
outputs=[status_text, csv_output, json_output]
|
| 349 |
-
).then(
|
| 350 |
-
fn=update_display,
|
| 351 |
-
inputs=status_text,
|
| 352 |
-
outputs=output_display
|
| 353 |
)
|
| 354 |
|
| 355 |
# Example section
|
|
|
|
| 153 |
def process_pdf(pdf_file, questions_per_chunk: int = 2, max_chunks: int = 20):
|
| 154 |
"""Main processing function."""
|
| 155 |
if pdf_file is None:
|
| 156 |
+
return "Please upload a PDF file.", "", "", "Your flashcards will appear here..."
|
| 157 |
|
| 158 |
try:
|
| 159 |
# Extract text
|
| 160 |
+
yield "π Extracting text from PDF...", "", "", "Processing..."
|
| 161 |
raw_text = extract_text_from_pdf(pdf_file)
|
| 162 |
|
| 163 |
if raw_text.startswith("Error"):
|
| 164 |
+
yield raw_text, "", "", "Error occurred"
|
| 165 |
+
return
|
| 166 |
|
| 167 |
if len(raw_text.strip()) < 100:
|
| 168 |
+
yield "PDF appears to be empty or contains no extractable text.", "", "", "Error occurred"
|
| 169 |
+
return
|
| 170 |
|
| 171 |
# Clean text
|
| 172 |
+
yield "π§Ή Cleaning text...", "", "", "Processing..."
|
| 173 |
cleaned_text = clean_text(raw_text)
|
| 174 |
|
| 175 |
# Chunk text
|
| 176 |
+
yield "βοΈ Chunking text into sections...", "", "", "Processing..."
|
| 177 |
chunks = chunk_text(cleaned_text)
|
| 178 |
|
| 179 |
# Limit chunks for CPU performance
|
|
|
|
| 185 |
|
| 186 |
for i, chunk in enumerate(chunks):
|
| 187 |
progress = f"π΄ Generating flashcards... ({i+1}/{total_chunks} chunks processed)"
|
| 188 |
+
yield progress, "", "", "Processing..."
|
| 189 |
|
| 190 |
cards = generate_qa_pairs(chunk, questions_per_chunk)
|
| 191 |
all_flashcards.extend(cards)
|
| 192 |
|
| 193 |
if not all_flashcards:
|
| 194 |
+
yield "Could not generate flashcards from this PDF. Try a PDF with more textual content.", "", "", "No flashcards generated"
|
| 195 |
+
return
|
| 196 |
|
| 197 |
# Format output
|
| 198 |
+
yield "β
Finalizing...", "", "", "Almost done..."
|
| 199 |
|
| 200 |
# Create formatted display
|
| 201 |
display_text = format_flashcards_display(all_flashcards)
|
|
|
|
| 211 |
csv_lines.append(f'"{q}","{a}"')
|
| 212 |
csv_output = "\n".join(csv_lines)
|
| 213 |
|
| 214 |
+
# FINAL OUTPUT - this updates all components
|
| 215 |
+
yield "β
Done! Generated {} flashcards".format(len(all_flashcards)), csv_output, json_output, display_text
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
+
error_msg = f"Error processing PDF: {str(e)}"
|
| 219 |
+
print(error_msg)
|
| 220 |
+
yield error_msg, "", "", error_msg
|
| 221 |
|
| 222 |
def format_flashcards_display(flashcards: List[Dict]) -> str:
|
| 223 |
"""Format flashcards for nice display."""
|
|
|
|
| 341 |
)
|
| 342 |
gr.Markdown("*Raw JSON data for custom applications*")
|
| 343 |
|
| 344 |
+
# FIXED: Direct binding without the broken .then() chain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
process_btn.click(
|
| 346 |
fn=process_pdf,
|
| 347 |
inputs=[pdf_input, questions_per_chunk, max_chunks],
|
| 348 |
+
outputs=[status_text, csv_output, json_output, output_display]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
)
|
| 350 |
|
| 351 |
# Example section
|