Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,17 +24,6 @@ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
|
| 24 |
from TTS.api import TTS
|
| 25 |
import pickle
|
| 26 |
|
| 27 |
-
# Try to install OpenVoice from GitHub if not found
|
| 28 |
-
try:
|
| 29 |
-
from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
|
| 30 |
-
from openvoice.se_extractor import get_se
|
| 31 |
-
except ImportError:
|
| 32 |
-
print("Installing OpenVoice from GitHub...")
|
| 33 |
-
import subprocess
|
| 34 |
-
subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
|
| 35 |
-
from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
|
| 36 |
-
from openvoice.se_extractor import get_se
|
| 37 |
-
|
| 38 |
# Suppress warnings
|
| 39 |
warnings.filterwarnings("ignore")
|
| 40 |
|
|
@@ -337,30 +326,9 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
| 337 |
mixed.export(out_path, format="wav")
|
| 338 |
return out_path
|
| 339 |
|
| 340 |
-
# === Voice Cloning
|
| 341 |
-
def clone_voice(
|
| 342 |
-
|
| 343 |
-
source_se, _ = get_se(source_audio)
|
| 344 |
-
target_se, _ = get_se(target_audio)
|
| 345 |
-
|
| 346 |
-
# Generate base TTS
|
| 347 |
-
out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
|
| 348 |
-
tts.tts_to_file(text=text, file_path=out_path)
|
| 349 |
-
|
| 350 |
-
# Apply voice conversion
|
| 351 |
-
tone_converter.convert(
|
| 352 |
-
audio_src_path=out_path,
|
| 353 |
-
src_se=source_se,
|
| 354 |
-
tgt_se=target_se,
|
| 355 |
-
output_path=out_path
|
| 356 |
-
)
|
| 357 |
-
|
| 358 |
-
return out_path
|
| 359 |
-
except Exception as e:
|
| 360 |
-
return f"⚠️ Cloning failed: {str(e)}"
|
| 361 |
-
|
| 362 |
-
tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
|
| 363 |
-
openvoice_tts = OpenVoiceTTS(lang='en')
|
| 364 |
|
| 365 |
# === Speaker Diarization ("Who Spoke When?") ===
|
| 366 |
try:
|
|
@@ -376,19 +344,21 @@ except Exception as e:
|
|
| 376 |
print(f"⚠️ Failed to load diarization: {e}")
|
| 377 |
|
| 378 |
def diarize_and_transcribe(audio_path):
|
| 379 |
-
if diarize_pipeline
|
| 380 |
return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
|
| 381 |
|
|
|
|
| 382 |
audio = AudioSegment.from_file(audio_path)
|
| 383 |
temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
|
| 384 |
audio.export(temp_wav, format="wav")
|
| 385 |
|
| 386 |
try:
|
|
|
|
| 387 |
diarization = diarize_pipeline(temp_wav)
|
| 388 |
|
| 389 |
result = whisper.transcribe(temp_wav)
|
| 390 |
-
segments = []
|
| 391 |
|
|
|
|
| 392 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
| 393 |
text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
|
| 394 |
segments.append({
|
|
@@ -492,8 +462,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 492 |
description="Convert voice to text and edit it before exporting again."
|
| 493 |
)
|
| 494 |
|
| 495 |
-
# --- Voice Cloning (Dubbing) ===
|
| 496 |
-
with gr.Tab("🎭 Voice Cloning (
|
| 497 |
gr.Interface(
|
| 498 |
fn=clone_voice,
|
| 499 |
inputs=[
|
|
@@ -502,7 +472,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 502 |
gr.Textbox(label="Text to Clone", lines=5)
|
| 503 |
],
|
| 504 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 505 |
-
title="Replace One Voice With Another",
|
| 506 |
description="Clone voice from source to target speaker using AI"
|
| 507 |
)
|
| 508 |
|
|
@@ -543,7 +513,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
| 543 |
return None, None, None, None
|
| 544 |
|
| 545 |
with gr.Tab("🧾 Auto-Save & Resume"):
|
| 546 |
-
gr.Markdown("Save your current state and resume later.")
|
| 547 |
|
| 548 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
| 549 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
|
|
|
| 24 |
from TTS.api import TTS
|
| 25 |
import pickle
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Suppress warnings
|
| 28 |
warnings.filterwarnings("ignore")
|
| 29 |
|
|
|
|
| 326 |
mixed.export(out_path, format="wav")
|
| 327 |
return out_path
|
| 328 |
|
| 329 |
+
# === Dummy Voice Cloning Tab – Works on Hugging Face ===
|
| 330 |
+
def clone_voice(*args):
|
| 331 |
+
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
# === Speaker Diarization ("Who Spoke When?") ===
|
| 334 |
try:
|
|
|
|
| 344 |
print(f"⚠️ Failed to load diarization: {e}")
|
| 345 |
|
| 346 |
def diarize_and_transcribe(audio_path):
|
| 347 |
+
if not diarize_pipeline:
|
| 348 |
return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
|
| 349 |
|
| 350 |
+
# Run diarization
|
| 351 |
audio = AudioSegment.from_file(audio_path)
|
| 352 |
temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
|
| 353 |
audio.export(temp_wav, format="wav")
|
| 354 |
|
| 355 |
try:
|
| 356 |
+
from pyannote.audio import Pipeline as DiarizationPipeline
|
| 357 |
diarization = diarize_pipeline(temp_wav)
|
| 358 |
|
| 359 |
result = whisper.transcribe(temp_wav)
|
|
|
|
| 360 |
|
| 361 |
+
segments = []
|
| 362 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
| 363 |
text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
|
| 364 |
segments.append({
|
|
|
|
| 462 |
description="Convert voice to text and edit it before exporting again."
|
| 463 |
)
|
| 464 |
|
| 465 |
+
# --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
|
| 466 |
+
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
| 467 |
gr.Interface(
|
| 468 |
fn=clone_voice,
|
| 469 |
inputs=[
|
|
|
|
| 472 |
gr.Textbox(label="Text to Clone", lines=5)
|
| 473 |
],
|
| 474 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
| 475 |
+
title="Replace One Voice With Another (Local Only)",
|
| 476 |
description="Clone voice from source to target speaker using AI"
|
| 477 |
)
|
| 478 |
|
|
|
|
| 513 |
return None, None, None, None
|
| 514 |
|
| 515 |
with gr.Tab("🧾 Auto-Save & Resume"):
|
| 516 |
+
gr.Markdown("Save your current state and resume editing later.")
|
| 517 |
|
| 518 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
| 519 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|