Spaces:
Sleeping
Sleeping
File size: 2,030 Bytes
1404207 754e278 c36673e 1404207 433a1d8 1404207 50aeec7 3c0704f c36673e 50aeec7 1404207 c36673e 50aeec7 c36673e 433a1d8 c36673e 253b843 c36673e 433a1d8 c36673e 1404207 253b843 433a1d8 50aeec7 c36673e 433a1d8 c36673e 50aeec7 433a1d8 50aeec7 754e278 433a1d8 c36673e 0861d22 433a1d8 754e278 0861d22 433a1d8 1404207 433a1d8 754e278 3a7b95c 1404207 433a1d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import gradio as gr
import torch
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import os
def load_eng_model():
repo_id = "E-motionAssistant/text-to-speech-VITS-english"
print("--- Starting Weights Surgery ---")
model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
# 1. Load the "Brain" (Checkpoint) directly into PyTorch
checkpoint = torch.load(model_path, map_location="cpu")
# 2. PERFORM SURGERY: Shrink the layer from 137 down to 131
# This removes the mismatch error entirely
raw_weights = checkpoint['model']['text_encoder.emb.weight']
print(f"Original weight shape: {raw_weights.shape}")
if raw_weights.shape[0] == 137:
print("Trimming 137 -> 131...")
checkpoint['model']['text_encoder.emb.weight'] = raw_weights[:131, :]
# 3. Save the "Fixed" brain to a new file
fixed_model_path = os.path.join(os.getcwd(), "fixed_model.pth")
torch.save(checkpoint, fixed_model_path)
print("Surgery complete. Fixed model saved.")
# 4. Load using the standard TTS library
# Now that the weights match (131), it won't crash!
tts = TTS(model_path=fixed_model_path, config_path=config_path, gpu=False)
return tts
# --- Initialization ---
try:
eng_tts = load_eng_model()
print("--- SUCCESS: SURGERY WORKED, SYSTEM ONLINE ---")
except Exception as e:
print(f"CRITICAL ERROR: {e}")
eng_tts = None
def generate_voice(text):
if not eng_tts: return None
try:
output_path = "output.wav"
eng_tts.tts_to_file(text=str(text), file_path=output_path)
return output_path
except Exception as e:
print(f"Synthesis Error: {e}")
return None
demo = gr.Interface(
fn=generate_voice,
inputs=gr.Textbox(label="English Text"),
outputs=gr.Audio(label="Result", type="filepath"),
title="English TTS"
)
if __name__ == "__main__":
demo.launch() |