Spaces:

Agents-MCP-Hackathon
/

pdf_explainer

Paused

App Files Files Community

spagestic commited on Jun 9, 2025

Commit

37d50b6

1 Parent(s): c2ba9da

feat: Enhance PDF processing to automatically extract text, generate explanations, and produce audio

Browse files

Files changed (1) hide show

_app.py +52 -42

_app.py CHANGED Viewed

@@ -7,23 +7,65 @@ load_dotenv()
 def main():
     """Main function to create and launch the interface."""
     def process_pdf(pdf_file):
-        """Process PDF and extract text automatically"""
         if pdf_file is None:
-            return "", "No PDF uploaded"
         try:
-            # Initialize extractor
             extractor = PDFTextExtractor()
-            # Extract text from PDF
             extracted_text, status, images_data = extractor.extract_text_from_pdf(pdf_file)
-            return extracted_text, status
         except Exception as e:
-            return "", f"Error processing PDF: {str(e)}"
     def generate_explanations(extracted_text):
         """Generate explanations for extracted text"""
@@ -120,58 +162,26 @@ def main():
                             show_copy_button=True,
                             interactive=False
                         )
                     with gr.TabItem("Explanation Script"):
-                        with gr.Row():
-                            generate_explanation_btn = gr.Button(
-                                "🤖 Generate Explanation Script",
-                                variant="primary",
-                                size="lg"
-                            )
                         explanation_output = gr.Textbox(
                             label="Generated Explanation Script",
                             lines=15,
-                            placeholder="Click 'Generate Explanation Script' after extracting text to get explanations...",
                             show_copy_button=True,
                             interactive=False
                         )
                 # Audio generation section (below tabs)
                 gr.Markdown("### 🔊 Audio Generation")
-                with gr.Row():
-                    generate_audio_btn = gr.Button(
-                        "🎵 Generate Explanation Audio",
-                        variant="secondary",
-                        size="lg"
-                    )
                 audio_output = gr.Audio(
                     label="Generated Explanation Audio",
                     interactive=False,
                     visible=False
-                )
-          # Set up automatic processing on PDF upload
         pdf_input.upload(
             fn=process_pdf,
             inputs=[pdf_input],
-            outputs=[text_output, status_output]
-        )
-        # Set up explanation generation button
-        generate_explanation_btn.click(
-            fn=generate_explanations,
-            inputs=[text_output],
-            outputs=[explanation_output],
-            show_progress=True
-        )
-        # Set up audio generation button
-        generate_audio_btn.click(
-            fn=generate_audio,
-            inputs=[explanation_output],
-            outputs=[audio_output, audio_output],
-            show_progress=True
         )
     return demo

 def main():
     """Main function to create and launch the interface."""
     def process_pdf(pdf_file):
+        """Process PDF and extract text, then explanations, then audio, updating UI at each step."""
         if pdf_file is None:
+            yield "", "No PDF uploaded", "", None, gr.update(visible=False)
+            return
         try:
             extractor = PDFTextExtractor()
+            # Step 1: Extract text
+            # Show "Extracting text..." message
+            yield "", gr.update(value="Extracting text..."), "", None, gr.update(visible=False)
             extracted_text, status, images_data = extractor.extract_text_from_pdf(pdf_file)
+            if not extracted_text or extracted_text.strip() == "":
+                yield extracted_text, status, "No text available to explain.", None, gr.update(visible=False)
+                return
+            # Show extracted text immediately, explanations/audio loading
+            yield extracted_text, status, gr.update(value="Generating explanations..."), None, gr.update(visible=False)
+            # Step 2: Generate explanations
+            try:
+                explanations = extractor.generate_explanations(extracted_text)
+                # Show explanations immediately, audio loading
+                yield extracted_text, status, explanations, gr.update(value="Generating audio..."), gr.update(visible=False)
+                # Step 3: Generate audio
+                try:
+                    from ui.chatterbox.generate_tts_audio import generate_tts_audio
+                    # Clean up the text for better TTS
+                    clean_text = explanations.strip()
+                    # Limit text length for TTS (assuming 1000 character limit)
+                    if len(clean_text) > 1000:
+                        sentences = clean_text[:950].split('.')
+                        if len(sentences) > 1:
+                            clean_text = '.'.join(sentences[:-1]) + '.'
+                        else:
+                            clean_text = clean_text[:950]
+                        clean_text += " [Text has been truncated for audio generation]"
+                    audio_result = generate_tts_audio(clean_text, None)
+                    # Show everything
+                    yield extracted_text, status, explanations, audio_result, gr.update(visible=True)
+                except Exception as audio_error:
+                    # Show explanations, but indicate audio error
+                    yield extracted_text, status, explanations, f"Error generating audio: {str(audio_error)}", gr.update(visible=False)
+            except Exception as explanation_error:
+                # Show extracted text, but indicate explanation error
+                yield extracted_text, status, f"Error generating explanations: {str(explanation_error)}", None, gr.update(visible=False)
         except Exception as e:
+            yield "", f"Error processing PDF: {str(e)}", "", None, gr.update(visible=False)
     def generate_explanations(extracted_text):
         """Generate explanations for extracted text"""
                             show_copy_button=True,
                             interactive=False
                         )
                     with gr.TabItem("Explanation Script"):
                         explanation_output = gr.Textbox(
                             label="Generated Explanation Script",
                             lines=15,
+                            placeholder="Explanations will be automatically generated after text extraction...",
                             show_copy_button=True,
                             interactive=False
                         )
                 # Audio generation section (below tabs)
                 gr.Markdown("### 🔊 Audio Generation")
                 audio_output = gr.Audio(
                     label="Generated Explanation Audio",
                     interactive=False,
                     visible=False
+                )        # Set up automatic processing on PDF upload (now handles all steps)
         pdf_input.upload(
             fn=process_pdf,
             inputs=[pdf_input],
+            outputs=[text_output, status_output, explanation_output, audio_output, audio_output]
         )
     return demo