Spaces:

midhyaraj
/

vc

Runtime error

App Files Files Community

midhyaraj commited on Oct 28, 2024

Commit

8eab11b

verified ·

1 Parent(s): c3d1a4f

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -4

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ def setup_environment():
     subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
     # Install the package using setup.py
-    subprocess.run([sys.executable, "setup.py", "install"], check=True)  # Make sure this line is included
     # Install Gradio
     subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)
@@ -46,16 +46,90 @@ def main():
     ]
     def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
-        # Your inference function implementation here...
     # Create the Gradio interface
     interface = gr.Interface(
         fn=inference,
         inputs=[
-            # Define your inputs here...
         ],
         outputs=[
-            # Define your outputs here...
         ],
         title="RJ VOICE CLONING",
         description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",

     subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
     # Install the package using setup.py
+    subprocess.run([sys.executable, "setup.py", "install"], check=True)
     # Install Gradio
     subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)
     ]
     def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
+        if voice != "custom_voice":
+            voices = [voice]
+        else:
+            voices = []
+        if voice_b != "disabled":
+            voices.append(voice_b)
+        if voice_c != "disabled":
+            voices.append(voice_c)
+        if emotion != "None/Custom":
+            text = f"[I am really {emotion.lower()},] {text}"
+        elif prompt.strip() != "":
+            text = f"[{prompt},] {text}"
+        c = None
+        if voice == "custom_voice":
+            if mic_audio is None:
+                raise gr.Error("Please provide audio from mic when choosing custom voice")
+            c = torchaudio.load(mic_audio)[0]  # Use torchaudio to load audio
+        if len(voices) == 1 or len(voices) == 0:
+            if voice == "custom_voice":
+                voice_samples, conditioning_latents = [c], None
+            else:
+                voice_samples, conditioning_latents = tts.load_voice(voice)  # Ensure to call TTS method
+        else:
+            voice_samples, conditioning_latents = tts.load_voices(voices)
+            if voice == "custom_voice":
+                voice_samples.append(c)
+        sample_voice = voice_samples[0] if len(voice_samples) else None
+        start_time = time.time()
+        gen, _ = tts.tts_with_preset(
+            text,
+            voice_samples=voice_samples,
+            conditioning_latents=conditioning_latents,
+            preset=preset,
+            use_deterministic_seed=seed,
+            return_deterministic_state=True,
+            k=3,
+        )
+        return (
+            (22050, sample_voice.squeeze().cpu().numpy()),
+            (24000, gen[0].squeeze().cpu().numpy()),
+            (24000, gen[1].squeeze().cpu().numpy()),
+            (24000, gen[2].squeeze().cpu().numpy()),
+        )
     # Create the Gradio interface
     interface = gr.Interface(
         fn=inference,
         inputs=[
+            gr.Textbox(lines=4, label="Text:"),
+            gr.Radio(["None/Custom", "Happy", "Sad", "Angry", "Disgusted", "Arrogant"],
+                     value="None/Custom", label="Select emotion:"),
+            gr.Textbox(lines=1, label="Enter prompt if [Custom] emotion:"),
+            gr.Radio(["ultra_fast", "fast", "standard", "high_quality"],
+                     value="fast", label="Preset mode:"),
+            gr.Dropdown(
+                options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
+                value="angie",  # Default voice
+                label="Select voice:"
+            ),
+            gr.Audio(label="Record voice (when selected custom_voice):", type="filepath"),
+            gr.Dropdown(
+                options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
+                value="disabled",
+                label="(Optional) Select second voice:"
+            ),
+            gr.Dropdown(
+                options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
+                value="disabled",
+                label="(Optional) Select third voice:"
+            ),
+            gr.Number(value=0, precision=0, label="Seed (for reproducibility):"),
         ],
         outputs=[
+            gr.Audio(label="Sample of selected voice (first):"),
+            gr.Audio(label="Output [Candidate 1]:"),
+            gr.Audio(label="Output [Candidate 2]:"),
+            gr.Audio(label="Output [Candidate 3]:"),
         ],
         title="RJ VOICE CLONING",
         description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",