Spaces:

deidank
/

TTS_with_Oscar

Build error

App Files Files Community

TTS_with_Oscar / app.py

deidank

Update app.py

eedce0f verified 10 months ago

raw

history blame contribute delete

5.36 kB

	import os
	import tempfile
	from TTS.api import TTS as TTSClass
	from PyPDF2 import PdfReader
	import gradio as gr
	import torch
	import textwrap
	import time
	import zipfile
	from pydub import AudioSegment

	# Registro seguro compatible con PyTorch 2.6+
	from TTS.tts.configs.xtts_config import XttsConfig, XttsAudioConfig
	from TTS.tts.models.xtts import XttsArgs
	from TTS.config.shared_configs import BaseDatasetConfig

	torch.serialization.add_safe_globals({
	XttsConfig,
	XttsAudioConfig,
	XttsArgs,
	BaseDatasetConfig,
	})

	os.environ["COQUI_TOS_AGREED"] = "1"

	voz_path = "Oscar.wav"
	tts = None

	def cargar_modelo():
	print("Cargando modelo TTS desde cero...")
	return TTSClass(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)

	def extraer_texto_pdf(ruta_pdf):
	try:
	reader = PdfReader(ruta_pdf)
	texto = ""
	for pagina in reader.pages:
	texto += pagina.extract_text() or ""
	return texto
	except Exception as e:
	print(f"❌ Error al extraer texto del PDF: {e}")
	return None

	def dividir_en_fragmentos(texto, max_len=239):
	return textwrap.wrap(texto, width=max_len, break_long_words=False, break_on_hyphens=False)

	def unir_audios(lista_rutas):
	combined = None
	for wav_file in lista_rutas:
	seg = AudioSegment.from_wav(wav_file)
	if combined is None:
	combined = seg
	else:
	combined += seg
	out_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
	combined.export(out_file, format="wav")
	for f in lista_rutas:
	os.remove(f)
	return out_file

	def leer_con_voz(texto, archivo_pdf, paginas_por_bloque):
	tts_local = cargar_modelo()

	if not os.path.exists(voz_path):
	return "❌ No se encontró el archivo de voz Oscar.wav", None, None

	if archivo_pdf:
	texto_completo = extraer_texto_pdf(archivo_pdf.name)
	if texto_completo is None:
	return "❌ Error al leer el PDF", None, None

	paginas = texto_completo.split("\f")
	bloques = [" ".join(paginas[i:i+paginas_por_bloque]) for i in range(0, len(paginas), paginas_por_bloque)]

	zip_path = tempfile.NamedTemporaryFile(suffix=".zip", delete=False).name
	with zipfile.ZipFile(zip_path, 'w') as zipf:
	for i, bloque in enumerate(bloques):
	fragmentos = dividir_en_fragmentos(bloque)
	for j, frag in enumerate(fragmentos):
	if not frag.strip():
	continue
	print(f"🗣️ Generando bloque {i+1}, fragmento {j+1}...")
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
	tts_local.tts_to_file(
	text=frag,
	file_path=temp_audio.name,
	speaker_wav=voz_path,
	language="es"
	)
	zipf.write(temp_audio.name, f"bloque_{i+1}_frag_{j+1}.wav")
	os.remove(temp_audio.name)
	return "✅ ZIP generado", None, zip_path

	if not texto.strip():
	return "❌ El texto está vacío", None, None

	fragmentos = dividir_en_fragmentos(texto)
	audios_temp = []
	try:
	for idx, frag in enumerate(fragmentos):
	print(f"🗣️ Generando fragmento {idx+1}/{len(fragmentos)}...")
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
	tts_local.tts_to_file(
	text=frag,
	file_path=temp_audio.name,
	speaker_wav=voz_path,
	language="es"
	)
	audios_temp.append(temp_audio.name)

	audio_final = unir_audios(audios_temp)
	return "✅ Audio generado", audio_final, None

	except Exception as e:
	for f in audios_temp:
	if os.path.exists(f):
	os.remove(f)
	return f"❌ Error: {str(e)}", None, None

	with gr.Blocks() as app:
	gr.Markdown("## 📖 Lector con voz personalizada (Oscar)")
	gr.Markdown("Sube un PDF o escribe texto. Se convertirá a audio usando tu voz.")

	with gr.Row():
	texto_input = gr.Textbox(lines=10, label="Texto (opcional)")
	pdf_input = gr.File(label="PDF (opcional)", file_types=[".pdf"])

	paginas_slider = gr.Slider(1, 10, value=2, label="Páginas por bloque (si usas PDF)")
	estado_output = gr.Textbox(label="Estado", value="Esperando acción...")
	audio_output = gr.Audio(label="Audio generado", visible=False)
	zip_output = gr.File(label="ZIP de audios", visible=False)
	boton_generar = gr.Button("🎿 Generar audio")

	def procesar(texto, archivo_pdf, paginas_por_bloque):
	time.sleep(0.1)
	estado, audio, zip_file = leer_con_voz(texto, archivo_pdf, paginas_por_bloque)
	mostrar_audio = audio is not None
	mostrar_zip = zip_file is not None
	return (
	estado,
	gr.update(value=audio, visible=mostrar_audio),
	gr.update(value=zip_file, visible=mostrar_zip)
	)

	boton_generar.click(
	fn=procesar,
	inputs=[texto_input, pdf_input, paginas_slider],
	outputs=[estado_output, audio_output, zip_output]
	)

	app.launch()