Spaces:
Sleeping
Sleeping
| # tts_utils.py | |
| import torch | |
| from parler_tts import ParlerTTSForConditionalGeneration | |
| from transformers import AutoTokenizer | |
| # Updated load_model function in tts_utils.py | |
| def load_model(): | |
| model = ParlerTTSForConditionalGeneration.from_pretrained( | |
| "ai4bharat/indic-parler-tts", | |
| torch_dtype=torch.float32 # Force CPU-compatible dtype | |
| ) | |
| # Apply dynamic quantization to Linear layers | |
| quantized_model = torch.ao.quantization.quantize_dynamic( | |
| model, | |
| {torch.nn.Linear}, # Target layer type | |
| dtype=torch.qint8 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts") | |
| description_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts") | |
| return quantized_model, tokenizer, description_tokenizer | |
| def generate_speech(text, voice_prompt, model, tokenizer, description_tokenizer): | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| description_input_ids = description_tokenizer( | |
| voice_prompt, | |
| return_tensors="pt" | |
| ).to(device) | |
| prompt_input_ids = tokenizer(text, return_tensors="pt").to(device) | |
| generation = model.generate( | |
| input_ids=description_input_ids.input_ids, | |
| attention_mask=description_input_ids.attention_mask, | |
| prompt_input_ids=prompt_input_ids.input_ids, | |
| prompt_attention_mask=prompt_input_ids.attention_mask, | |
| max_new_tokens=1024 | |
| ) | |
| return generation.cpu().numpy().squeeze() |