Spaces:
Build error
Build error
| import os | |
| import tempfile | |
| from TTS.api import TTS as TTSClass | |
| from PyPDF2 import PdfReader | |
| import gradio as gr | |
| import torch | |
| import textwrap | |
| import time | |
| import zipfile | |
| from pydub import AudioSegment | |
| # Registro seguro compatible con PyTorch 2.6+ | |
| from TTS.tts.configs.xtts_config import XttsConfig, XttsAudioConfig | |
| from TTS.tts.models.xtts import XttsArgs | |
| from TTS.config.shared_configs import BaseDatasetConfig | |
| torch.serialization.add_safe_globals({ | |
| XttsConfig, | |
| XttsAudioConfig, | |
| XttsArgs, | |
| BaseDatasetConfig, | |
| }) | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| voz_path = "Oscar.wav" | |
| tts = None | |
| def cargar_modelo(): | |
| print("Cargando modelo TTS desde cero...") | |
| return TTSClass(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) | |
| def extraer_texto_pdf(ruta_pdf): | |
| try: | |
| reader = PdfReader(ruta_pdf) | |
| texto = "" | |
| for pagina in reader.pages: | |
| texto += pagina.extract_text() or "" | |
| return texto | |
| except Exception as e: | |
| print(f"❌ Error al extraer texto del PDF: {e}") | |
| return None | |
| def dividir_en_fragmentos(texto, max_len=239): | |
| return textwrap.wrap(texto, width=max_len, break_long_words=False, break_on_hyphens=False) | |
| def unir_audios(lista_rutas): | |
| combined = None | |
| for wav_file in lista_rutas: | |
| seg = AudioSegment.from_wav(wav_file) | |
| if combined is None: | |
| combined = seg | |
| else: | |
| combined += seg | |
| out_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
| combined.export(out_file, format="wav") | |
| for f in lista_rutas: | |
| os.remove(f) | |
| return out_file | |
| def leer_con_voz(texto, archivo_pdf, paginas_por_bloque): | |
| tts_local = cargar_modelo() | |
| if not os.path.exists(voz_path): | |
| return "❌ No se encontró el archivo de voz Oscar.wav", None, None | |
| if archivo_pdf: | |
| texto_completo = extraer_texto_pdf(archivo_pdf.name) | |
| if texto_completo is None: | |
| return "❌ Error al leer el PDF", None, None | |
| paginas = texto_completo.split("\f") | |
| bloques = [" ".join(paginas[i:i+paginas_por_bloque]) for i in range(0, len(paginas), paginas_por_bloque)] | |
| zip_path = tempfile.NamedTemporaryFile(suffix=".zip", delete=False).name | |
| with zipfile.ZipFile(zip_path, 'w') as zipf: | |
| for i, bloque in enumerate(bloques): | |
| fragmentos = dividir_en_fragmentos(bloque) | |
| for j, frag in enumerate(fragmentos): | |
| if not frag.strip(): | |
| continue | |
| print(f"🗣️ Generando bloque {i+1}, fragmento {j+1}...") | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
| tts_local.tts_to_file( | |
| text=frag, | |
| file_path=temp_audio.name, | |
| speaker_wav=voz_path, | |
| language="es" | |
| ) | |
| zipf.write(temp_audio.name, f"bloque_{i+1}_frag_{j+1}.wav") | |
| os.remove(temp_audio.name) | |
| return "✅ ZIP generado", None, zip_path | |
| if not texto.strip(): | |
| return "❌ El texto está vacío", None, None | |
| fragmentos = dividir_en_fragmentos(texto) | |
| audios_temp = [] | |
| try: | |
| for idx, frag in enumerate(fragmentos): | |
| print(f"🗣️ Generando fragmento {idx+1}/{len(fragmentos)}...") | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
| tts_local.tts_to_file( | |
| text=frag, | |
| file_path=temp_audio.name, | |
| speaker_wav=voz_path, | |
| language="es" | |
| ) | |
| audios_temp.append(temp_audio.name) | |
| audio_final = unir_audios(audios_temp) | |
| return "✅ Audio generado", audio_final, None | |
| except Exception as e: | |
| for f in audios_temp: | |
| if os.path.exists(f): | |
| os.remove(f) | |
| return f"❌ Error: {str(e)}", None, None | |
| with gr.Blocks() as app: | |
| gr.Markdown("## 📖 Lector con voz personalizada (Oscar)") | |
| gr.Markdown("Sube un PDF o escribe texto. Se convertirá a audio usando tu voz.") | |
| with gr.Row(): | |
| texto_input = gr.Textbox(lines=10, label="Texto (opcional)") | |
| pdf_input = gr.File(label="PDF (opcional)", file_types=[".pdf"]) | |
| paginas_slider = gr.Slider(1, 10, value=2, label="Páginas por bloque (si usas PDF)") | |
| estado_output = gr.Textbox(label="Estado", value="Esperando acción...") | |
| audio_output = gr.Audio(label="Audio generado", visible=False) | |
| zip_output = gr.File(label="ZIP de audios", visible=False) | |
| boton_generar = gr.Button("🎿 Generar audio") | |
| def procesar(texto, archivo_pdf, paginas_por_bloque): | |
| time.sleep(0.1) | |
| estado, audio, zip_file = leer_con_voz(texto, archivo_pdf, paginas_por_bloque) | |
| mostrar_audio = audio is not None | |
| mostrar_zip = zip_file is not None | |
| return ( | |
| estado, | |
| gr.update(value=audio, visible=mostrar_audio), | |
| gr.update(value=zip_file, visible=mostrar_zip) | |
| ) | |
| boton_generar.click( | |
| fn=procesar, | |
| inputs=[texto_input, pdf_input, paginas_slider], | |
| outputs=[estado_output, audio_output, zip_output] | |
| ) | |
| app.launch() |