Spaces:

don0726
/

pp

Sleeping

don0726 commited on Apr 15

Commit

1149d1a

verified ·

1 Parent(s): 9fd1484

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import os
 import uuid
 import torch
 import gradio as gr
 from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
 from fastapi.responses import FileResponse
 from TTS.api import TTS
 import uvicorn
-from pydub import AudioSegment
 os.environ["COQUI_TOS_AGREED"] = "1"
@@ -33,12 +33,21 @@ OUTPUT_DIR = "outputs"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # =========================
-# 🔊 AUDIO CONVERT
 # =========================
 def convert_to_wav(input_path, output_path):
-    audio = AudioSegment.from_file(input_path)
-    audio = audio.set_channels(1).set_frame_rate(16000)
-    audio.export(output_path, format="wav")
 # =========================
 # 🧹 CLEAN
@@ -79,15 +88,15 @@ async def clone_voice_api(
         busy = True
-        raw_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_raw"
         input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_clean.wav"
         output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_out.wav"
-        # Save
         with open(raw_path, "wb") as f:
             f.write(await audio.read())
-        # Convert
         convert_to_wav(raw_path, input_path)
         cleanup_files(raw_path)
@@ -139,7 +148,7 @@ async def clone_voice_ui(audio_path, text, language):
     return "✅ Done", output_path
 with gr.Blocks() as demo:
-    gr.Markdown("# ⚡ Ultra Fast XTTS")
     a = gr.Audio(type="filepath")
     t = gr.Textbox()

 import os
 import uuid
 import torch
+import torchaudio
 import gradio as gr
 from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
 from fastapi.responses import FileResponse
 from TTS.api import TTS
 import uvicorn
 os.environ["COQUI_TOS_AGREED"] = "1"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # =========================
+# 🔊 AUDIO CONVERT (NO FFMPEG)
 # =========================
 def convert_to_wav(input_path, output_path):
+    waveform, sr = torchaudio.load(input_path)
+    # Convert to mono
+    if waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    # Resample to 16kHz
+    if sr != 16000:
+        resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
+        waveform = resampler(waveform)
+    torchaudio.save(output_path, waveform, 16000)
 # =========================
 # 🧹 CLEAN
         busy = True
+        raw_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_raw.wav"
         input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_clean.wav"
         output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_out.wav"
+        # Save file
         with open(raw_path, "wb") as f:
             f.write(await audio.read())
+        # 🔥 Convert (fast)
         convert_to_wav(raw_path, input_path)
         cleanup_files(raw_path)
     return "✅ Done", output_path
 with gr.Blocks() as demo:
+    gr.Markdown("# ⚡ XTTS (No FFmpeg - Ultra Fast)")
     a = gr.Audio(type="filepath")
     t = gr.Textbox()