Spaces:
Sleeping
Sleeping
File size: 2,908 Bytes
8f3c067 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
"""Simplified audio generation functionality that delegates complex processing to the TTS API."""
from typing import Tuple, Optional
import gradio as gr
import numpy as np
class SimpleAudioProcessor:
"""Simplified audio processor that uses the enhanced TTS API for complex processing."""
def __init__(self):
"""Initialize the simple audio processor."""
pass
def generate_audio(self, explanation_text: str, progress=None) -> Tuple[Tuple[int, np.ndarray], dict]:
"""
Generate TTS audio for explanations using the enhanced TTS API.
This method sends the full text to the TTS API which handles:
- Text chunking
- Parallel processing
- Audio concatenation
- All on the server side with GPU acceleration
Args:
explanation_text: The text to convert to audio
progress: Optional progress callback
Returns:
Tuple of (audio_result, update_dict) where audio_result is (sample_rate, audio_data)
"""
if not explanation_text or explanation_text.strip() == "":
raise gr.Error("No explanations available to convert to audio. Please generate explanations first.")
try:
clean_text = explanation_text.strip()
if progress:
progress(0.1, desc="Sending text to TTS API for processing...")
# Import the simplified audio generation function
from .generate_simple_tts_audio import generate_simple_tts_audio
# Generate audio using the new simplified API call
audio_result = generate_simple_tts_audio(clean_text, progress=progress)
if progress:
progress(1.0, desc="Audio generation complete!")
return audio_result, gr.update(visible=True)
except Exception as e:
raise gr.Error(f"Error generating audio: {str(e)}")
def get_processing_info(self, text: str) -> dict:
"""Get basic information about the text to be processed."""
if not text or not text.strip():
return {"error": "No text provided"}
text_length = len(text.strip())
estimated_chunks = max(1, text_length // 800) # Rough estimate
estimated_time = text_length * 0.05 # Rough estimate: 0.05 seconds per character
return {
"processing_mode": "server_side_parallel",
"text_length": text_length,
"estimated_chunks": estimated_chunks,
"estimated_time_seconds": estimated_time,
"estimated_time_readable": f"{estimated_time:.1f} seconds" if estimated_time < 60 else f"{estimated_time/60:.1f} minutes",
"note": "Processing handled by TTS API with GPU acceleration"
}
|