rasenganai commited on
Commit
8f40e60
·
verified ·
1 Parent(s): bc7830e

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -67
app.py DELETED
@@ -1,67 +0,0 @@
1
- import sys, os, torch
2
- # sys.path.append("Testing/")
3
- import gradio as gr
4
- from inference import infer, prepare_inputs, load_t2s_model, load_cfm, create_wav_header
5
- from tqdm import tqdm
6
-
7
- # Setup
8
- os.makedirs("generated_samples/", exist_ok=True)
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- print("Using device", device)
11
-
12
- # Model checkpoints
13
- m1_checkpoint = "pretrained_checkpoint/m1_gemma_benchmark_1_latest_weights.pt"
14
- m2_checkpoint = "pretrained_checkpoint/m2.pt"
15
- vocoder_checkpoint = 'pretrained_checkpoint/700_580k_multilingual_infer_ready/'
16
-
17
- global FM, vocoder, m2, mu, std, m1
18
-
19
- # Load models
20
- FM, vocoder, m2, mu, std = load_cfm(m2_checkpoint, vocoder_checkpoint, device)
21
- m1 = load_t2s_model(m1_checkpoint, device)
22
-
23
-
24
- # Speaker reference clips
25
- speaker_refs = {
26
- "Speaker1": [
27
- "speakers/female1/train_hindifemale_02794.wav",
28
- "speakers/female1/train_hindifemale_04167.wav",
29
- "speakers/female1/train_hindifemale_02795.wav"
30
- ]
31
- }
32
-
33
- # Available languages (can be extended)
34
- available_languages = ["hindi"]
35
-
36
- # Inference function
37
- def generate_audio(text, speaker_name, language):
38
- if speaker_name not in speaker_refs:
39
- return f"Reference clips not available for {speaker_name}", None
40
-
41
- ref_clips = speaker_refs[speaker_name]
42
-
43
- text_ids, code_ids, language_code, ref_mels_m1, ref_mels_m2 = prepare_inputs(
44
- text.lower(),
45
- ref_clips_m1=ref_clips,
46
- ref_clips_m2=ref_clips,
47
- language=language,
48
- device=device
49
- )
50
-
51
- audio_wav = infer(m1, m2, vocoder, FM, mu, std, text_ids, code_ids, language_code, ref_mels_m1, ref_mels_m2, device)
52
- return 24000,audio_wav
53
-
54
- # Gradio UI
55
- interface = gr.Interface(
56
- fn=generate_audio,
57
- inputs=[
58
- gr.Textbox(label="Enter Text"),
59
- gr.Dropdown(choices=list(speaker_refs.keys()), label="Select Speaker"),
60
- gr.Dropdown(choices=available_languages, label="Select Language")
61
- ],
62
- outputs=gr.Audio(label="Generated Speech"),
63
- title="MAHATTSv2 Demo",
64
- description="Enter text, choose a speaker and language to generate speech."
65
- )
66
-
67
- interface.launch(share=True,server_port=9999)