Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import io, os, uuid, zipfile, tempfile, subprocess
|
| 3 |
from pydub import AudioSegment
|
| 4 |
from pydub.silence import split_on_silence
|
| 5 |
|
|
@@ -17,7 +17,7 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
|
|
| 17 |
buf.seek(0)
|
| 18 |
return buf
|
| 19 |
|
| 20 |
-
def remove_silence(seg: AudioSegment, keep_ms=
|
| 21 |
chunks = split_on_silence(
|
| 22 |
seg,
|
| 23 |
min_silence_len=int(min_silence_ms),
|
|
@@ -33,7 +33,6 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
|
|
| 33 |
return seg + AudioSegment.silent(duration=t_ms - len(seg))
|
| 34 |
|
| 35 |
def _atempo_chain(factor: float) -> str:
|
| 36 |
-
# Build a chain so each step stays within [0.5, 2.0] for better quality.
|
| 37 |
steps = []
|
| 38 |
f = max(0.1, min(10.0, float(factor)))
|
| 39 |
while f < 0.5:
|
|
@@ -59,20 +58,39 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
|
|
| 59 |
return io.BytesIO(f.read())
|
| 60 |
|
| 61 |
def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
|
| 62 |
-
# Lightweight perceived normalization using RMS (keeps deps minimal).
|
| 63 |
import math
|
| 64 |
rms = seg.rms or 1
|
| 65 |
current_db = 20 * math.log10(rms / (1 << 15))
|
| 66 |
gain_db = float(target_lufs) - current_db
|
| 67 |
return seg.apply_gain(gain_db)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
# ---------- processors ----------
|
| 70 |
def process_single(file, mode, target_seconds, keep_silence_s,
|
| 71 |
min_silence_ms, silence_thresh_db, do_normalize, fmt):
|
| 72 |
raw = file if isinstance(file, (bytes, bytearray)) else file.read()
|
| 73 |
original = _load(raw)
|
| 74 |
|
| 75 |
-
# 1) optional silence removal / pause control
|
| 76 |
cleaned = remove_silence(
|
| 77 |
original,
|
| 78 |
keep_ms=int(float(keep_silence_s) * 1000),
|
|
@@ -80,11 +98,9 @@ def process_single(file, mode, target_seconds, keep_silence_s,
|
|
| 80 |
thresh_db=float(silence_thresh_db),
|
| 81 |
)
|
| 82 |
|
| 83 |
-
# 2) optional loudness normalize
|
| 84 |
if do_normalize:
|
| 85 |
cleaned = normalize_lufs(cleaned, -14.0)
|
| 86 |
|
| 87 |
-
# 3) timing mode
|
| 88 |
if mode == "trim" and target_seconds:
|
| 89 |
final = trim_to_seconds(cleaned, target_seconds)
|
| 90 |
out = _export(final, fmt)
|
|
@@ -110,30 +126,37 @@ def process_batch(files, **kwargs) -> io.BytesIO:
|
|
| 110 |
return zbuf
|
| 111 |
|
| 112 |
def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
|
| 113 |
-
# Gradio Audio preview works great with a file path; write a temp file.
|
| 114 |
tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
|
| 115 |
tf.write(blob.getvalue())
|
| 116 |
tf.flush(); tf.close()
|
| 117 |
return tf.name
|
| 118 |
|
| 119 |
-
# ---------- UI ----------
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
with gr.Row():
|
| 127 |
with gr.Column():
|
| 128 |
-
files = gr.Files(label="Upload audio
|
|
|
|
| 129 |
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
|
| 130 |
-
target = gr.Number(value=30, label="Target seconds (for trim/fit)")
|
| 131 |
-
keep = gr.Number(value=0.25, label="Set pause length (seconds
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
|
| 136 |
-
go = gr.Button("Process")
|
| 137 |
|
| 138 |
with gr.Column():
|
| 139 |
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
|
|
@@ -141,12 +164,12 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
|
|
| 141 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 142 |
rep = gr.Textbox(label="Report", lines=1)
|
| 143 |
|
| 144 |
-
def run(files, mode, target, keep, min_sil, thresh, do_norm, fmt):
|
| 145 |
files = files or []
|
| 146 |
if not files:
|
| 147 |
return None, None, None, "Please upload at least one audio file."
|
| 148 |
|
| 149 |
-
#
|
| 150 |
single_blob, report = process_single(
|
| 151 |
open(files[0], "rb"),
|
| 152 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
@@ -156,22 +179,25 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
|
|
| 156 |
preview_path = write_temp_for_preview(single_blob, fmt)
|
| 157 |
|
| 158 |
if len(files) == 1:
|
| 159 |
-
#
|
| 160 |
-
|
| 161 |
-
|
|
|
|
| 162 |
else:
|
| 163 |
-
#
|
| 164 |
opened = [open(p, "rb") for p in files]
|
| 165 |
zipped = process_batch(
|
| 166 |
opened, mode=mode, target_seconds=target, keep_silence_s=keep,
|
| 167 |
min_silence_ms=min_sil, silence_thresh_db=thresh,
|
| 168 |
do_normalize=do_norm, fmt=fmt
|
| 169 |
)
|
| 170 |
-
|
|
|
|
|
|
|
| 171 |
|
| 172 |
go.click(
|
| 173 |
run,
|
| 174 |
-
[files, mode, target, keep, min_sil, thresh, do_norm, fmt],
|
| 175 |
[preview, direct, zip_out, rep]
|
| 176 |
)
|
| 177 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import io, os, re, uuid, zipfile, tempfile, subprocess
|
| 3 |
from pydub import AudioSegment
|
| 4 |
from pydub.silence import split_on_silence
|
| 5 |
|
|
|
|
| 17 |
buf.seek(0)
|
| 18 |
return buf
|
| 19 |
|
| 20 |
+
def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
|
| 21 |
chunks = split_on_silence(
|
| 22 |
seg,
|
| 23 |
min_silence_len=int(min_silence_ms),
|
|
|
|
| 33 |
return seg + AudioSegment.silent(duration=t_ms - len(seg))
|
| 34 |
|
| 35 |
def _atempo_chain(factor: float) -> str:
|
|
|
|
| 36 |
steps = []
|
| 37 |
f = max(0.1, min(10.0, float(factor)))
|
| 38 |
while f < 0.5:
|
|
|
|
| 58 |
return io.BytesIO(f.read())
|
| 59 |
|
| 60 |
def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
|
|
|
|
| 61 |
import math
|
| 62 |
rms = seg.rms or 1
|
| 63 |
current_db = 20 * math.log10(rms / (1 << 15))
|
| 64 |
gain_db = float(target_lufs) - current_db
|
| 65 |
return seg.apply_gain(gain_db)
|
| 66 |
|
| 67 |
+
def sanitize_filename(name: str, default_stem="output"):
|
| 68 |
+
name = (name or "").strip()
|
| 69 |
+
# allow letters, digits, spaces, dashes, underscores; collapse spaces to underscores
|
| 70 |
+
safe = re.sub(r"[^A-Za-z0-9 _.-]", "", name)
|
| 71 |
+
safe = re.sub(r"\s+", "_", safe)
|
| 72 |
+
return safe or default_stem
|
| 73 |
+
|
| 74 |
+
def write_temp_file_with_name(blob: io.BytesIO, stem: str, ext: str) -> str:
|
| 75 |
+
stem = sanitize_filename(stem)
|
| 76 |
+
ext = ext.lower().strip(".")
|
| 77 |
+
tf = tempfile.NamedTemporaryFile(delete=False, prefix=f"{stem}_", suffix=f".{ext}")
|
| 78 |
+
tf.write(blob.getvalue())
|
| 79 |
+
tf.flush(); tf.close()
|
| 80 |
+
# Rename to exact requested stem if possible (NamedTemporaryFile adds random chars in prefix)
|
| 81 |
+
exact_path = os.path.join(os.path.dirname(tf.name), f"{stem}.{ext}")
|
| 82 |
+
try:
|
| 83 |
+
os.replace(tf.name, exact_path)
|
| 84 |
+
return exact_path
|
| 85 |
+
except Exception:
|
| 86 |
+
return tf.name # fallback
|
| 87 |
+
|
| 88 |
# ---------- processors ----------
|
| 89 |
def process_single(file, mode, target_seconds, keep_silence_s,
|
| 90 |
min_silence_ms, silence_thresh_db, do_normalize, fmt):
|
| 91 |
raw = file if isinstance(file, (bytes, bytearray)) else file.read()
|
| 92 |
original = _load(raw)
|
| 93 |
|
|
|
|
| 94 |
cleaned = remove_silence(
|
| 95 |
original,
|
| 96 |
keep_ms=int(float(keep_silence_s) * 1000),
|
|
|
|
| 98 |
thresh_db=float(silence_thresh_db),
|
| 99 |
)
|
| 100 |
|
|
|
|
| 101 |
if do_normalize:
|
| 102 |
cleaned = normalize_lufs(cleaned, -14.0)
|
| 103 |
|
|
|
|
| 104 |
if mode == "trim" and target_seconds:
|
| 105 |
final = trim_to_seconds(cleaned, target_seconds)
|
| 106 |
out = _export(final, fmt)
|
|
|
|
| 126 |
return zbuf
|
| 127 |
|
| 128 |
def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
|
|
|
|
| 129 |
tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
|
| 130 |
tf.write(blob.getvalue())
|
| 131 |
tf.flush(); tf.close()
|
| 132 |
return tf.name
|
| 133 |
|
| 134 |
+
# ---------- UI (minimal + custom filename) ----------
|
| 135 |
+
css = """
|
| 136 |
+
.gradio-container { max-width: 880px !important; margin: auto !important; }
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
| 140 |
+
gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.\n\n**Tip:** Set a custom file name below for your download.")
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
with gr.Column():
|
| 144 |
+
files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
|
| 145 |
+
|
| 146 |
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
|
| 147 |
+
target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
|
| 148 |
+
keep = gr.Number(value=0.25, label="Set pause length (seconds)")
|
| 149 |
+
|
| 150 |
+
# NEW: custom filename stem (no extension)
|
| 151 |
+
out_name = gr.Textbox(value="voiceover", label="Output filename (no extension)")
|
| 152 |
+
|
| 153 |
+
with gr.Accordion("Advanced (optional)", open=False):
|
| 154 |
+
min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
|
| 155 |
+
thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
|
| 156 |
+
do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
|
| 157 |
+
|
| 158 |
fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
|
| 159 |
+
go = gr.Button("Process", variant="primary")
|
| 160 |
|
| 161 |
with gr.Column():
|
| 162 |
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
|
|
|
|
| 164 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 165 |
rep = gr.Textbox(label="Report", lines=1)
|
| 166 |
|
| 167 |
+
def run(files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt):
|
| 168 |
files = files or []
|
| 169 |
if not files:
|
| 170 |
return None, None, None, "Please upload at least one audio file."
|
| 171 |
|
| 172 |
+
# process first file
|
| 173 |
single_blob, report = process_single(
|
| 174 |
open(files[0], "rb"),
|
| 175 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
|
|
| 179 |
preview_path = write_temp_for_preview(single_blob, fmt)
|
| 180 |
|
| 181 |
if len(files) == 1:
|
| 182 |
+
# return a file path with the requested name + extension
|
| 183 |
+
stem = sanitize_filename(out_name, default_stem="output")
|
| 184 |
+
out_path = write_temp_file_with_name(single_blob, stem, fmt)
|
| 185 |
+
return preview_path, out_path, None, report
|
| 186 |
else:
|
| 187 |
+
# multi → zip (named after chosen stem)
|
| 188 |
opened = [open(p, "rb") for p in files]
|
| 189 |
zipped = process_batch(
|
| 190 |
opened, mode=mode, target_seconds=target, keep_silence_s=keep,
|
| 191 |
min_silence_ms=min_sil, silence_thresh_db=thresh,
|
| 192 |
do_normalize=do_norm, fmt=fmt
|
| 193 |
)
|
| 194 |
+
zip_stem = sanitize_filename(out_name, default_stem="batch_output")
|
| 195 |
+
zip_path = write_temp_file_with_name(zipped, f"{zip_stem}_batch", "zip")
|
| 196 |
+
return preview_path, None, zip_path, report
|
| 197 |
|
| 198 |
go.click(
|
| 199 |
run,
|
| 200 |
+
[files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt],
|
| 201 |
[preview, direct, zip_out, rep]
|
| 202 |
)
|
| 203 |
|