Spaces:

tee342
/

AudioMaster

Sleeping

App Files Files Community

tee342 commited on Jun 12, 2025

Commit

6085d7e

verified ·

1 Parent(s): aea8d70

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -41

app.py CHANGED Viewed

@@ -24,17 +24,6 @@ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
 from TTS.api import TTS
 import pickle
-# Try to install OpenVoice from GitHub if not found
-try:
-    from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
-    from openvoice.se_extractor import get_se
-except ImportError:
-    print("Installing OpenVoice from GitHub...")
-    import subprocess
-    subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
-    from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
-    from openvoice.se_extractor import get_se
 # Suppress warnings
 warnings.filterwarnings("ignore")
@@ -337,30 +326,9 @@ def mix_tracks(track1, track2, volume_offset=0):
     mixed.export(out_path, format="wav")
     return out_path
-# === Voice Cloning / Dubbing Tab ===
-def clone_voice(source_audio, target_audio, text):
-    try:
-        source_se, _ = get_se(source_audio)
-        target_se, _ = get_se(target_audio)
-        # Generate base TTS
-        out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
-        tts.tts_to_file(text=text, file_path=out_path)
-        # Apply voice conversion
-        tone_converter.convert(
-            audio_src_path=out_path,
-            src_se=source_se,
-            tgt_se=target_se,
-            output_path=out_path
-        )
-        return out_path
-    except Exception as e:
-        return f"⚠️ Cloning failed: {str(e)}"
-tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
-openvoice_tts = OpenVoiceTTS(lang='en')
 # === Speaker Diarization ("Who Spoke When?") ===
 try:
@@ -376,19 +344,21 @@ except Exception as e:
     print(f"⚠️ Failed to load diarization: {e}")
 def diarize_and_transcribe(audio_path):
-    if diarize_pipeline is None:
         return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
     audio = AudioSegment.from_file(audio_path)
     temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
     audio.export(temp_wav, format="wav")
     try:
         diarization = diarize_pipeline(temp_wav)
         result = whisper.transcribe(temp_wav)
-        segments = []
         for turn, _, speaker in diarization.itertracks(yield_label=True):
             text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
             segments.append({
@@ -492,8 +462,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
             description="Convert voice to text and edit it before exporting again."
         )
-    # --- Voice Cloning (Dubbing) ===
-    with gr.Tab("🎭 Voice Cloning (Dubbing)"):
         gr.Interface(
             fn=clone_voice,
             inputs=[
@@ -502,7 +472,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
                 gr.Textbox(label="Text to Clone", lines=5)
             ],
             outputs=gr.Audio(label="Cloned Output", type="filepath"),
-            title="Replace One Voice With Another",
             description="Clone voice from source to target speaker using AI"
         )
@@ -543,7 +513,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
         return None, None, None, None
     with gr.Tab("🧾 Auto-Save & Resume"):
-        gr.Markdown("Save your current state and resume later.")
         action_radio = gr.Radio(["save", "load"], label="Action", value="save")
         audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")

 from TTS.api import TTS
 import pickle
 # Suppress warnings
 warnings.filterwarnings("ignore")
     mixed.export(out_path, format="wav")
     return out_path
+# === Dummy Voice Cloning Tab – Works on Hugging Face ===
+def clone_voice(*args):
+    return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
 # === Speaker Diarization ("Who Spoke When?") ===
 try:
     print(f"⚠️ Failed to load diarization: {e}")
 def diarize_and_transcribe(audio_path):
+    if not diarize_pipeline:
         return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
+    # Run diarization
     audio = AudioSegment.from_file(audio_path)
     temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
     audio.export(temp_wav, format="wav")
     try:
+        from pyannote.audio import Pipeline as DiarizationPipeline
         diarization = diarize_pipeline(temp_wav)
         result = whisper.transcribe(temp_wav)
+        segments = []
         for turn, _, speaker in diarization.itertracks(yield_label=True):
             text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
             segments.append({
             description="Convert voice to text and edit it before exporting again."
         )
+    # --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
+    with gr.Tab("🎭 Voice Cloning (Local Only)"):
         gr.Interface(
             fn=clone_voice,
             inputs=[
                 gr.Textbox(label="Text to Clone", lines=5)
             ],
             outputs=gr.Audio(label="Cloned Output", type="filepath"),
+            title="Replace One Voice With Another (Local Only)",
             description="Clone voice from source to target speaker using AI"
         )
         return None, None, None, None
     with gr.Tab("🧾 Auto-Save & Resume"):
+        gr.Markdown("Save your current state and resume editing later.")
         action_radio = gr.Radio(["save", "load"], label="Action", value="save")
         audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")