Spaces:
Configuration error
Configuration error
File size: 1,981 Bytes
66e2a44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#!/usr/bin/env python3
"""
Script para gerar audio com StyleTTS2 e clonagem de voz.
Uso: python generate_audio.py --text "Seu texto aqui" --voice voice_ref.wav --output audio.wav
"""
import argparse
import os
# Fix para PyTorch 2.6+ com weights_only
import torch
original_load = torch.load
def patched_load(*args, **kwargs):
kwargs['weights_only'] = False
return original_load(*args, **kwargs)
torch.load = patched_load
def generate_audio(text: str, output_path: str, voice_ref: str = None, diffusion_steps: int = 10):
"""Gera audio usando StyleTTS2."""
print(f"Carregando StyleTTS2...")
from styletts2 import tts
my_tts = tts.StyleTTS2()
print(f"Gerando audio...")
print(f" Texto: {text[:50]}...")
print(f" Voz de referencia: {voice_ref or 'padrao'}")
if voice_ref and os.path.exists(voice_ref):
wav = my_tts.inference(
text,
target_voice_path=voice_ref,
diffusion_steps=diffusion_steps
)
else:
wav = my_tts.inference(
text,
diffusion_steps=diffusion_steps
)
# Salvar audio
import scipy.io.wavfile as wavfile
wavfile.write(output_path, 24000, wav)
print(f"Audio salvo em: {output_path}")
return output_path
def main():
parser = argparse.ArgumentParser(description='Gerar audio com StyleTTS2')
parser.add_argument('--text', '-t', required=True, help='Texto para converter em audio')
parser.add_argument('--output', '-o', default='output.wav', help='Arquivo de saida')
parser.add_argument('--voice', '-v', help='Arquivo WAV de referencia para clonagem de voz')
parser.add_argument('--steps', '-s', type=int, default=10, help='Passos de difusao (mais = melhor qualidade)')
args = parser.parse_args()
generate_audio(
text=args.text,
output_path=args.output,
voice_ref=args.voice,
diffusion_steps=args.steps
)
if __name__ == '__main__':
main()
|