Spaces:

E-motionAssistant
/

Space2

Sleeping

File size: 2,030 Bytes

1404207
754e278
c36673e
1404207
433a1d8
1404207
50aeec7
3c0704f
c36673e
50aeec7
1404207
 
 
c36673e
 
50aeec7
c36673e
 
 
 
433a1d8
c36673e
 
 
253b843
c36673e
 
 
 
 
 
 
 
433a1d8
c36673e
1404207
253b843
433a1d8
50aeec7
c36673e
433a1d8
c36673e
50aeec7
433a1d8
50aeec7
754e278
433a1d8
c36673e
 
0861d22
433a1d8
754e278
0861d22
433a1d8
1404207
433a1d8
754e278
 
3a7b95c
1404207
 
433a1d8

import gradio as gr
import torch
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import os

def load_eng_model():
    repo_id = "E-motionAssistant/text-to-speech-VITS-english"
    print("--- Starting Weights Surgery ---")
    
    model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
    config_path = hf_hub_download(repo_id=repo_id, filename="config.json")

    # 1. Load the "Brain" (Checkpoint) directly into PyTorch
    checkpoint = torch.load(model_path, map_location="cpu")
    
    # 2. PERFORM SURGERY: Shrink the layer from 137 down to 131
    # This removes the mismatch error entirely
    raw_weights = checkpoint['model']['text_encoder.emb.weight']
    print(f"Original weight shape: {raw_weights.shape}")
    
    if raw_weights.shape[0] == 137:
        print("Trimming 137 -> 131...")
        checkpoint['model']['text_encoder.emb.weight'] = raw_weights[:131, :]
    
    # 3. Save the "Fixed" brain to a new file
    fixed_model_path = os.path.join(os.getcwd(), "fixed_model.pth")
    torch.save(checkpoint, fixed_model_path)
    print("Surgery complete. Fixed model saved.")

    # 4. Load using the standard TTS library
    # Now that the weights match (131), it won't crash!
    tts = TTS(model_path=fixed_model_path, config_path=config_path, gpu=False)
    
    return tts

# --- Initialization ---
try:
    eng_tts = load_eng_model()
    print("--- SUCCESS: SURGERY WORKED, SYSTEM ONLINE ---")
except Exception as e:
    print(f"CRITICAL ERROR: {e}")
    eng_tts = None

def generate_voice(text):
    if not eng_tts: return None
    try:
        output_path = "output.wav"
        eng_tts.tts_to_file(text=str(text), file_path=output_path)
        return output_path
    except Exception as e:
        print(f"Synthesis Error: {e}")
        return None

demo = gr.Interface(
    fn=generate_voice,
    inputs=gr.Textbox(label="English Text"),
    outputs=gr.Audio(label="Result", type="filepath"),
    title="English TTS"
)

if __name__ == "__main__":
    demo.launch()