Spaces:

LEMAS-Project
/

LEMAS-TTS

Running on Zero

App Files Files Community

Approximetal commited on Jan 4

Commit

3e1b384

verified ·

1 Parent(s): 9f66cd3

Update inference_gradio.py

Browse files

Files changed (1) hide show

inference_gradio.py +35 -9

inference_gradio.py CHANGED Viewed

@@ -110,7 +110,6 @@ class UVR5:
 denoise_model = UVR5(
     model_dir=Path(PRETRAINED_ROOT) / "uvr5",
-    code_dir=REPO_ROOT / "uvr5",
 )
 def load_wav(audio_info, sr=16000, channel=1):
@@ -130,11 +129,9 @@ def load_wav(audio_info, sr=16000, channel=1):
 def denoise(audio_info):
-    save_path = "./denoised_audio.wav"
     denoised_audio, sr = denoise_model.denoise(audio_info)
-    sf.write(save_path, denoised_audio, sr, format='wav', subtype='PCM_24')
-    print("save denoised audio:", save_path)
-    return save_path
 def cancel_denoise(audio_info):
     return audio_info
@@ -240,8 +237,22 @@ def infer(
     if not os.path.isfile(ckpt_resolved):
         return None, "Checkpoint not found!", ""
-    if denoise_audio:
-        ref_audio = denoise_audio
     # Automatically enable prosody encoder when using the prosody checkpoint
     use_prosody_encoder = True if "prosody" in str(ckpt_resolved) else False
@@ -274,6 +285,9 @@ def infer(
         )
     except Exception as e:
         traceback.print_exc()
         return None, f"Error loading model: {str(e)}", ""
     print("Model loaded >>", file_checkpoint, use_ema)
@@ -284,7 +298,7 @@ def infer(
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
             tts_api.infer(
-                ref_file=ref_audio,
                 ref_text=ref_text.strip(),
                 gen_text=gen_text.strip(),
                 nfe_step=nfe_step,
@@ -303,6 +317,10 @@ def infer(
     except Exception as e:
         traceback.print_exc()
         return None, f"Inference error: {str(e)}", ""
 def get_gpu_stats():
@@ -457,7 +475,15 @@ with gr.Blocks(title="LEMAS-TTS Inference") as app:
         with gr.Row():
             denoise_btn = gr.Button(value="Denoise")
             cancel_btn = gr.Button(value="Cancel Denoise")
-        denoise_audio = gr.Audio(label="Denoised Audio", value=None, type="filepath", interactive=True, show_download_button=True, editable=True)
     gen_text = gr.Textbox(label="Text to Generate", placeholder="Enter the text you want to generate...")

 denoise_model = UVR5(
     model_dir=Path(PRETRAINED_ROOT) / "uvr5",
 )
 def load_wav(audio_info, sr=16000, channel=1):
 def denoise(audio_info):
+    # Return a numpy waveform tuple for direct playback in Gradio.
     denoised_audio, sr = denoise_model.denoise(audio_info)
+    return (sr, denoised_audio)
 def cancel_denoise(audio_info):
     return audio_info
     if not os.path.isfile(ckpt_resolved):
         return None, "Checkpoint not found!", ""
+    # Prepare reference audio:
+    # - `ref_audio` from Gradio is a filepath (original reference)
+    # - `denoise_audio` is an optional (sr, numpy_array) tuple from UVR5.
+    #   If provided, dump it to a temporary wav file and use that as ref_file.
+    ref_audio_path = ref_audio
+    tmp_ref_path = None
+    if denoise_audio is not None:
+        try:
+            sr_d, wav_d = denoise_audio
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f_ref:
+                sf.write(f_ref.name, wav_d, int(sr_d), format="wav", subtype="PCM_24")
+                tmp_ref_path = f_ref.name
+                ref_audio_path = f_ref.name
+        except Exception as e:
+            traceback.print_exc()
+            return None, f"Error preparing denoised reference audio: {str(e)}", ""
     # Automatically enable prosody encoder when using the prosody checkpoint
     use_prosody_encoder = True if "prosody" in str(ckpt_resolved) else False
         )
     except Exception as e:
         traceback.print_exc()
+        # Cleanup temp ref file if it was created
+        if tmp_ref_path is not None and os.path.isfile(tmp_ref_path):
+            os.remove(tmp_ref_path)
         return None, f"Error loading model: {str(e)}", ""
     print("Model loaded >>", file_checkpoint, use_ema)
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
             tts_api.infer(
+                ref_file=ref_audio_path,
                 ref_text=ref_text.strip(),
                 gen_text=gen_text.strip(),
                 nfe_step=nfe_step,
     except Exception as e:
         traceback.print_exc()
         return None, f"Inference error: {str(e)}", ""
+    finally:
+        # Remove temporary reference file if created
+        if tmp_ref_path is not None and os.path.isfile(tmp_ref_path):
+            os.remove(tmp_ref_path)
 def get_gpu_stats():
         with gr.Row():
             denoise_btn = gr.Button(value="Denoise")
             cancel_btn = gr.Button(value="Cancel Denoise")
+        # Use numpy type here so we can reuse the waveform directly in Python.
+        denoise_audio = gr.Audio(
+            label="Denoised Audio",
+            value=None,
+            type="numpy",
+            interactive=True,
+            show_download_button=True,
+            editable=True,
+        )
     gen_text = gr.Textbox(label="Text to Generate", placeholder="Enter the text you want to generate...")