midhyaraj commited on
Commit
8eab11b
·
verified ·
1 Parent(s): c3d1a4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -4
app.py CHANGED
@@ -15,7 +15,7 @@ def setup_environment():
15
  subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
16
 
17
  # Install the package using setup.py
18
- subprocess.run([sys.executable, "setup.py", "install"], check=True) # Make sure this line is included
19
 
20
  # Install Gradio
21
  subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)
@@ -46,16 +46,90 @@ def main():
46
  ]
47
 
48
  def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
49
- # Your inference function implementation here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  # Create the Gradio interface
52
  interface = gr.Interface(
53
  fn=inference,
54
  inputs=[
55
- # Define your inputs here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ],
57
  outputs=[
58
- # Define your outputs here...
 
 
 
59
  ],
60
  title="RJ VOICE CLONING",
61
  description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",
 
15
  subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
16
 
17
  # Install the package using setup.py
18
+ subprocess.run([sys.executable, "setup.py", "install"], check=True)
19
 
20
  # Install Gradio
21
  subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)
 
46
  ]
47
 
48
  def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
49
+ if voice != "custom_voice":
50
+ voices = [voice]
51
+ else:
52
+ voices = []
53
+
54
+ if voice_b != "disabled":
55
+ voices.append(voice_b)
56
+ if voice_c != "disabled":
57
+ voices.append(voice_c)
58
+
59
+ if emotion != "None/Custom":
60
+ text = f"[I am really {emotion.lower()},] {text}"
61
+ elif prompt.strip() != "":
62
+ text = f"[{prompt},] {text}"
63
+
64
+ c = None
65
+ if voice == "custom_voice":
66
+ if mic_audio is None:
67
+ raise gr.Error("Please provide audio from mic when choosing custom voice")
68
+ c = torchaudio.load(mic_audio)[0] # Use torchaudio to load audio
69
+
70
+ if len(voices) == 1 or len(voices) == 0:
71
+ if voice == "custom_voice":
72
+ voice_samples, conditioning_latents = [c], None
73
+ else:
74
+ voice_samples, conditioning_latents = tts.load_voice(voice) # Ensure to call TTS method
75
+ else:
76
+ voice_samples, conditioning_latents = tts.load_voices(voices)
77
+ if voice == "custom_voice":
78
+ voice_samples.append(c)
79
+
80
+ sample_voice = voice_samples[0] if len(voice_samples) else None
81
+
82
+ start_time = time.time()
83
+ gen, _ = tts.tts_with_preset(
84
+ text,
85
+ voice_samples=voice_samples,
86
+ conditioning_latents=conditioning_latents,
87
+ preset=preset,
88
+ use_deterministic_seed=seed,
89
+ return_deterministic_state=True,
90
+ k=3,
91
+ )
92
+
93
+ return (
94
+ (22050, sample_voice.squeeze().cpu().numpy()),
95
+ (24000, gen[0].squeeze().cpu().numpy()),
96
+ (24000, gen[1].squeeze().cpu().numpy()),
97
+ (24000, gen[2].squeeze().cpu().numpy()),
98
+ )
99
 
100
  # Create the Gradio interface
101
  interface = gr.Interface(
102
  fn=inference,
103
  inputs=[
104
+ gr.Textbox(lines=4, label="Text:"),
105
+ gr.Radio(["None/Custom", "Happy", "Sad", "Angry", "Disgusted", "Arrogant"],
106
+ value="None/Custom", label="Select emotion:"),
107
+ gr.Textbox(lines=1, label="Enter prompt if [Custom] emotion:"),
108
+ gr.Radio(["ultra_fast", "fast", "standard", "high_quality"],
109
+ value="fast", label="Preset mode:"),
110
+ gr.Dropdown(
111
+ options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
112
+ value="angie", # Default voice
113
+ label="Select voice:"
114
+ ),
115
+ gr.Audio(label="Record voice (when selected custom_voice):", type="filepath"),
116
+ gr.Dropdown(
117
+ options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
118
+ value="disabled",
119
+ label="(Optional) Select second voice:"
120
+ ),
121
+ gr.Dropdown(
122
+ options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
123
+ value="disabled",
124
+ label="(Optional) Select third voice:"
125
+ ),
126
+ gr.Number(value=0, precision=0, label="Seed (for reproducibility):"),
127
  ],
128
  outputs=[
129
+ gr.Audio(label="Sample of selected voice (first):"),
130
+ gr.Audio(label="Output [Candidate 1]:"),
131
+ gr.Audio(label="Output [Candidate 2]:"),
132
+ gr.Audio(label="Output [Candidate 3]:"),
133
  ],
134
  title="RJ VOICE CLONING",
135
  description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",