File size: 1,176 Bytes
dbee7f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import torch
from transformers import VitsModel, AutoTokenizer

class TTSManager:
    def __init__(self, output_dir, use_cuda_if_available=True):
        self.output_dir = output_dir
        self.device = "cuda" if use_cuda_if_available and torch.cuda.is_available() else "cpu"
        
        # Load a professional VITS model for Arabic
        self.model_name = "facebook/mms-tts-ara" 
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = VitsModel.from_pretrained(self.model_name).to(self.device)

    def tts(self, text, rate=1.0, denoise=0.01):
        # 1. Tokenize the text
        inputs = self.tokenizer(text, return_tensors="pt").to(self.device)
        
        # 2. Generate audio
        with torch.no_grad():
            output = self.model(**inputs).waveform
        
        # 3. Save to a file
        import scipy.io.wavfile as wavfile
        output_path = f"{self.output_dir}/output.wav"
        # Convert to numpy and save
        audio_data = output.cpu().numpy().squeeze()
        wavfile.write(output_path, self.model.config.sampling_rate, audio_data)
        
        return {"audio_url": "/static/output.wav"}