"""Simplified audio generation functionality that delegates complex processing to the TTS API.""" from typing import Tuple, Optional import gradio as gr import numpy as np class SimpleAudioProcessor: """Simplified audio processor that uses the enhanced TTS API for complex processing.""" def __init__(self): """Initialize the simple audio processor.""" pass def generate_audio(self, explanation_text: str, progress=None) -> Tuple[Tuple[int, np.ndarray], dict]: """ Generate TTS audio for explanations using the enhanced TTS API. This method sends the full text to the TTS API which handles: - Text chunking - Parallel processing - Audio concatenation - All on the server side with GPU acceleration Args: explanation_text: The text to convert to audio progress: Optional progress callback Returns: Tuple of (audio_result, update_dict) where audio_result is (sample_rate, audio_data) """ if not explanation_text or explanation_text.strip() == "": raise gr.Error("No explanations available to convert to audio. Please generate explanations first.") try: clean_text = explanation_text.strip() if progress: progress(0.1, desc="Sending text to TTS API for processing...") # Import the simplified audio generation function from .generate_simple_tts_audio import generate_simple_tts_audio # Generate audio using the new simplified API call audio_result = generate_simple_tts_audio(clean_text, progress=progress) if progress: progress(1.0, desc="Audio generation complete!") return audio_result, gr.update(visible=True) except Exception as e: raise gr.Error(f"Error generating audio: {str(e)}") def get_processing_info(self, text: str) -> dict: """Get basic information about the text to be processed.""" if not text or not text.strip(): return {"error": "No text provided"} text_length = len(text.strip()) estimated_chunks = max(1, text_length // 800) # Rough estimate estimated_time = text_length * 0.05 # Rough estimate: 0.05 seconds per character return { "processing_mode": "server_side_parallel", "text_length": text_length, "estimated_chunks": estimated_chunks, "estimated_time_seconds": estimated_time, "estimated_time_readable": f"{estimated_time:.1f} seconds" if estimated_time < 60 else f"{estimated_time/60:.1f} minutes", "note": "Processing handled by TTS API with GPU acceleration" }