pdf_explainer / src /processors /pdf_processor.py
spagestic's picture
feat: implement simplified audio processing with enhanced TTS API integration
8f3c067
"""PDF processing functionality."""
import gradio as gr
from .pdf_text_extractor import PDFTextExtractor
class PDFProcessor:
"""Handles PDF processing operations."""
def __init__(self):
self.extractor = PDFTextExtractor()
def process_pdf(self, pdf_file):
"""Process PDF and extract text, then explanations, then audio, updating UI at each step."""
if pdf_file is None:
yield "", "No PDF uploaded", "", None, gr.update(visible=False)
return
try:
# Step 1: Extract text
# Show "Extracting text..." message
yield "", gr.update(value="Extracting text..."), "", None, gr.update(visible=False)
extracted_text, status, images_data = self.extractor.extract_text_from_pdf(pdf_file)
if not extracted_text or extracted_text.strip() == "":
yield extracted_text, status, "No text available to explain.", None, gr.update(visible=False)
return
# Show extracted text immediately, explanations/audio loading
yield extracted_text, status, gr.update(value="Generating explanations..."), None, gr.update(visible=False)
# Step 2: Generate explanations
try:
explanations = self.extractor.generate_explanations(extracted_text)
# Show explanations immediately, update status for audio loading
yield extracted_text, gr.update(value="Generating audio..."), explanations, None, gr.update(visible=False) # Step 3: Generate audio
try:
from .simple_audio_processor import SimpleAudioProcessor
# Create simplified audio processor
audio_processor = SimpleAudioProcessor()
# Generate progress callback for audio processing
def audio_progress(progress, desc=""):
yield extracted_text, gr.update(value=f"Generating audio: {desc}"), explanations, None, gr.update(visible=False)
# Generate audio using the simplified processor
audio_result, _ = audio_processor.generate_audio(explanations, progress=audio_progress)
# Show everything, update status to complete
yield extracted_text, gr.update(value="All steps complete!"), explanations, audio_result, gr.update(visible=True)
except Exception as audio_error:
# Show explanations, update status with audio error
yield extracted_text, gr.update(value=f"Audio generation failed: {str(audio_error)}"), explanations, None, gr.update(visible=False)
except Exception as explanation_error:
# Show extracted text, but indicate explanation error
yield extracted_text, status, f"Error generating explanations: {str(explanation_error)}", None, gr.update(visible=False)
except Exception as e:
yield "", f"Error processing PDF: {str(e)}", "", None, gr.update(visible=False)