Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import io, os,
|
| 3 |
from pydub import AudioSegment
|
| 4 |
from pydub.silence import split_on_silence
|
| 5 |
|
|
@@ -43,7 +43,6 @@ def _atempo_chain(factor: float) -> str:
|
|
| 43 |
return ",".join([f"atempo={s:.5f}" for s in steps])
|
| 44 |
|
| 45 |
def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
|
| 46 |
-
"""Pitch-preserving time stretch via FFmpeg atempo."""
|
| 47 |
with tempfile.TemporaryDirectory() as d:
|
| 48 |
inp = os.path.join(d, "in.wav")
|
| 49 |
outp = os.path.join(d, f"out.{fmt_out}")
|
|
@@ -64,27 +63,6 @@ def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
|
|
| 64 |
gain_db = float(target_lufs) - current_db
|
| 65 |
return seg.apply_gain(gain_db)
|
| 66 |
|
| 67 |
-
def sanitize_filename(name: str, default_stem="output"):
|
| 68 |
-
name = (name or "").strip()
|
| 69 |
-
# allow letters, digits, spaces, dashes, underscores; collapse spaces to underscores
|
| 70 |
-
safe = re.sub(r"[^A-Za-z0-9 _.-]", "", name)
|
| 71 |
-
safe = re.sub(r"\s+", "_", safe)
|
| 72 |
-
return safe or default_stem
|
| 73 |
-
|
| 74 |
-
def write_temp_file_with_name(blob: io.BytesIO, stem: str, ext: str) -> str:
|
| 75 |
-
stem = sanitize_filename(stem)
|
| 76 |
-
ext = ext.lower().strip(".")
|
| 77 |
-
tf = tempfile.NamedTemporaryFile(delete=False, prefix=f"{stem}_", suffix=f".{ext}")
|
| 78 |
-
tf.write(blob.getvalue())
|
| 79 |
-
tf.flush(); tf.close()
|
| 80 |
-
# Rename to exact requested stem if possible (NamedTemporaryFile adds random chars in prefix)
|
| 81 |
-
exact_path = os.path.join(os.path.dirname(tf.name), f"{stem}.{ext}")
|
| 82 |
-
try:
|
| 83 |
-
os.replace(tf.name, exact_path)
|
| 84 |
-
return exact_path
|
| 85 |
-
except Exception:
|
| 86 |
-
return tf.name # fallback
|
| 87 |
-
|
| 88 |
# ---------- processors ----------
|
| 89 |
def process_single(file, mode, target_seconds, keep_silence_s,
|
| 90 |
min_silence_ms, silence_thresh_db, do_normalize, fmt):
|
|
@@ -131,45 +109,42 @@ def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
|
|
| 131 |
tf.flush(); tf.close()
|
| 132 |
return tf.name
|
| 133 |
|
| 134 |
-
# ---------- UI (
|
| 135 |
css = """
|
| 136 |
-
.gradio-container { max-width:
|
| 137 |
"""
|
| 138 |
|
| 139 |
with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
| 140 |
-
gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
-
|
|
|
|
| 144 |
files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
|
| 145 |
-
|
| 146 |
-
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
|
| 147 |
target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
|
| 148 |
keep = gr.Number(value=0.25, label="Set pause length (seconds)")
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
with gr.Accordion("Advanced (optional)", open=False):
|
| 154 |
-
min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
|
| 155 |
thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
|
| 156 |
do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
|
| 157 |
|
| 158 |
fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
|
| 159 |
go = gr.Button("Process", variant="primary")
|
| 160 |
|
| 161 |
-
|
|
|
|
| 162 |
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
|
| 163 |
direct = gr.File(label="Download processed file (single)")
|
| 164 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 165 |
rep = gr.Textbox(label="Report", lines=1)
|
| 166 |
|
| 167 |
-
def run(files, mode, target, keep, min_sil, thresh, do_norm,
|
| 168 |
files = files or []
|
| 169 |
if not files:
|
| 170 |
return None, None, None, "Please upload at least one audio file."
|
| 171 |
|
| 172 |
-
# process first file
|
| 173 |
single_blob, report = process_single(
|
| 174 |
open(files[0], "rb"),
|
| 175 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
@@ -179,25 +154,19 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
|
| 179 |
preview_path = write_temp_for_preview(single_blob, fmt)
|
| 180 |
|
| 181 |
if len(files) == 1:
|
| 182 |
-
|
| 183 |
-
stem = sanitize_filename(out_name, default_stem="output")
|
| 184 |
-
out_path = write_temp_file_with_name(single_blob, stem, fmt)
|
| 185 |
-
return preview_path, out_path, None, report
|
| 186 |
else:
|
| 187 |
-
# multi → zip (named after chosen stem)
|
| 188 |
opened = [open(p, "rb") for p in files]
|
| 189 |
zipped = process_batch(
|
| 190 |
opened, mode=mode, target_seconds=target, keep_silence_s=keep,
|
| 191 |
min_silence_ms=min_sil, silence_thresh_db=thresh,
|
| 192 |
do_normalize=do_norm, fmt=fmt
|
| 193 |
)
|
| 194 |
-
|
| 195 |
-
zip_path = write_temp_file_with_name(zipped, f"{zip_stem}_batch", "zip")
|
| 196 |
-
return preview_path, None, zip_path, report
|
| 197 |
|
| 198 |
go.click(
|
| 199 |
run,
|
| 200 |
-
[files, mode, target, keep, min_sil, thresh, do_norm,
|
| 201 |
[preview, direct, zip_out, rep]
|
| 202 |
)
|
| 203 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import io, os, uuid, zipfile, tempfile, subprocess
|
| 3 |
from pydub import AudioSegment
|
| 4 |
from pydub.silence import split_on_silence
|
| 5 |
|
|
|
|
| 43 |
return ",".join([f"atempo={s:.5f}" for s in steps])
|
| 44 |
|
| 45 |
def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
|
|
|
|
| 46 |
with tempfile.TemporaryDirectory() as d:
|
| 47 |
inp = os.path.join(d, "in.wav")
|
| 48 |
outp = os.path.join(d, f"out.{fmt_out}")
|
|
|
|
| 63 |
gain_db = float(target_lufs) - current_db
|
| 64 |
return seg.apply_gain(gain_db)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# ---------- processors ----------
|
| 67 |
def process_single(file, mode, target_seconds, keep_silence_s,
|
| 68 |
min_silence_ms, silence_thresh_db, do_normalize, fmt):
|
|
|
|
| 109 |
tf.flush(); tf.close()
|
| 110 |
return tf.name
|
| 111 |
|
| 112 |
+
# ---------- UI (two-column, compact) ----------
|
| 113 |
css = """
|
| 114 |
+
.gradio-container { max-width: 1100px !important; margin: auto !important; }
|
| 115 |
"""
|
| 116 |
|
| 117 |
with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
| 118 |
+
gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
|
| 119 |
|
| 120 |
with gr.Row():
|
| 121 |
+
# Left column: controls
|
| 122 |
+
with gr.Column(scale=1):
|
| 123 |
files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
|
| 124 |
+
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode", elem_id="mode")
|
|
|
|
| 125 |
target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
|
| 126 |
keep = gr.Number(value=0.25, label="Set pause length (seconds)")
|
| 127 |
|
| 128 |
+
with gr.Accordion("Advanced options", open=False):
|
| 129 |
+
min_sil = gr.Slider(50, 1000, 120, step=10, label="Pause if silence ≥ (ms)")
|
|
|
|
|
|
|
|
|
|
| 130 |
thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
|
| 131 |
do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
|
| 132 |
|
| 133 |
fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
|
| 134 |
go = gr.Button("Process", variant="primary")
|
| 135 |
|
| 136 |
+
# Right column: outputs
|
| 137 |
+
with gr.Column(scale=1):
|
| 138 |
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
|
| 139 |
direct = gr.File(label="Download processed file (single)")
|
| 140 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 141 |
rep = gr.Textbox(label="Report", lines=1)
|
| 142 |
|
| 143 |
+
def run(files, mode, target, keep, min_sil, thresh, do_norm, fmt):
|
| 144 |
files = files or []
|
| 145 |
if not files:
|
| 146 |
return None, None, None, "Please upload at least one audio file."
|
| 147 |
|
|
|
|
| 148 |
single_blob, report = process_single(
|
| 149 |
open(files[0], "rb"),
|
| 150 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
|
|
| 154 |
preview_path = write_temp_for_preview(single_blob, fmt)
|
| 155 |
|
| 156 |
if len(files) == 1:
|
| 157 |
+
return preview_path, single_blob, None, report
|
|
|
|
|
|
|
|
|
|
| 158 |
else:
|
|
|
|
| 159 |
opened = [open(p, "rb") for p in files]
|
| 160 |
zipped = process_batch(
|
| 161 |
opened, mode=mode, target_seconds=target, keep_silence_s=keep,
|
| 162 |
min_silence_ms=min_sil, silence_thresh_db=thresh,
|
| 163 |
do_normalize=do_norm, fmt=fmt
|
| 164 |
)
|
| 165 |
+
return preview_path, None, zipped, report
|
|
|
|
|
|
|
| 166 |
|
| 167 |
go.click(
|
| 168 |
run,
|
| 169 |
+
[files, mode, target, keep, min_sil, thresh, do_norm, fmt],
|
| 170 |
[preview, direct, zip_out, rep]
|
| 171 |
)
|
| 172 |
|