Spaces:

Agents-MCP-Hackathon
/

pdf_explainer

Sleeping

File size: 2,908 Bytes

8f3c067

"""Simplified audio generation functionality that delegates complex processing to the TTS API."""
from typing import Tuple, Optional
import gradio as gr
import numpy as np

class SimpleAudioProcessor:
    """Simplified audio processor that uses the enhanced TTS API for complex processing."""
    
    def __init__(self):
        """Initialize the simple audio processor."""
        pass
    
    def generate_audio(self, explanation_text: str, progress=None) -> Tuple[Tuple[int, np.ndarray], dict]:
        """
        Generate TTS audio for explanations using the enhanced TTS API.
        
        This method sends the full text to the TTS API which handles:
        - Text chunking
        - Parallel processing 
        - Audio concatenation
        - All on the server side with GPU acceleration
        
        Args:
            explanation_text: The text to convert to audio
            progress: Optional progress callback
            
        Returns:
            Tuple of (audio_result, update_dict) where audio_result is (sample_rate, audio_data)
        """
        if not explanation_text or explanation_text.strip() == "":
            raise gr.Error("No explanations available to convert to audio. Please generate explanations first.")
        
        try:
            clean_text = explanation_text.strip()
            
            if progress:
                progress(0.1, desc="Sending text to TTS API for processing...")
            
            # Import the simplified audio generation function
            from .generate_simple_tts_audio import generate_simple_tts_audio
            
            # Generate audio using the new simplified API call
            audio_result = generate_simple_tts_audio(clean_text, progress=progress)
            
            if progress:
                progress(1.0, desc="Audio generation complete!")
            
            return audio_result, gr.update(visible=True)
            
        except Exception as e:
            raise gr.Error(f"Error generating audio: {str(e)}")
    
    def get_processing_info(self, text: str) -> dict:
        """Get basic information about the text to be processed."""
        if not text or not text.strip():
            return {"error": "No text provided"}
        
        text_length = len(text.strip())
        estimated_chunks = max(1, text_length // 800)  # Rough estimate
        estimated_time = text_length * 0.05  # Rough estimate: 0.05 seconds per character
        
        return {
            "processing_mode": "server_side_parallel",
            "text_length": text_length,
            "estimated_chunks": estimated_chunks,
            "estimated_time_seconds": estimated_time,
            "estimated_time_readable": f"{estimated_time:.1f} seconds" if estimated_time < 60 else f"{estimated_time/60:.1f} minutes",
            "note": "Processing handled by TTS API with GPU acceleration"
        }