Spaces:

lifesee
/

VoiceoverStudio

Running

App Files Files Community

lifesee commited on Aug 24, 2025

Commit

ab32289

verified ·

1 Parent(s): 35d8a2f

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -29

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-import io, os, uuid, zipfile, tempfile, subprocess
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
@@ -17,7 +17,7 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
     buf.seek(0)
     return buf
-def remove_silence(seg: AudioSegment, keep_ms=50, min_silence_ms=100, thresh_db=-45):
     chunks = split_on_silence(
         seg,
         min_silence_len=int(min_silence_ms),
@@ -33,7 +33,6 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
     return seg + AudioSegment.silent(duration=t_ms - len(seg))
 def _atempo_chain(factor: float) -> str:
-    # Build a chain so each step stays within [0.5, 2.0] for better quality.
     steps = []
     f = max(0.1, min(10.0, float(factor)))
     while f < 0.5:
@@ -59,20 +58,39 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
             return io.BytesIO(f.read())
 def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
-    # Lightweight perceived normalization using RMS (keeps deps minimal).
     import math
     rms = seg.rms or 1
     current_db = 20 * math.log10(rms / (1 << 15))
     gain_db = float(target_lufs) - current_db
     return seg.apply_gain(gain_db)
 # ---------- processors ----------
 def process_single(file, mode, target_seconds, keep_silence_s,
                    min_silence_ms, silence_thresh_db, do_normalize, fmt):
     raw = file if isinstance(file, (bytes, bytearray)) else file.read()
     original = _load(raw)
-    # 1) optional silence removal / pause control
     cleaned = remove_silence(
         original,
         keep_ms=int(float(keep_silence_s) * 1000),
@@ -80,11 +98,9 @@ def process_single(file, mode, target_seconds, keep_silence_s,
         thresh_db=float(silence_thresh_db),
     )
-    # 2) optional loudness normalize
     if do_normalize:
         cleaned = normalize_lufs(cleaned, -14.0)
-    # 3) timing mode
     if mode == "trim" and target_seconds:
         final = trim_to_seconds(cleaned, target_seconds)
         out = _export(final, fmt)
@@ -110,30 +126,37 @@ def process_batch(files, **kwargs) -> io.BytesIO:
     return zbuf
 def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
-    # Gradio Audio preview works great with a file path; write a temp file.
     tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
     tf.write(blob.getvalue())
     tf.flush(); tf.close()
     return tf.name
-# ---------- UI ----------
-with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
-    gr.Markdown(
-        "### Remove or normalize pauses, **set pause length**, **trim to exact time**, or **fit length (pitch preserved)**.\n"
-        "_Outputs: mp3 / wav / m4a / ogg. Single file → direct download. Multiple files → ZIP._"
-    )
     with gr.Row():
         with gr.Column():
-            files = gr.Files(label="Upload audio (one or many)", file_types=["audio"], type="filepath")
             mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
-            target = gr.Number(value=30, label="Target seconds (for trim/fit)")
-            keep = gr.Number(value=0.25, label="Set pause length (seconds kept at cuts)")
-            min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
-            thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
-            do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
             fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
-            go = gr.Button("Process")
         with gr.Column():
             preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
@@ -141,12 +164,12 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
             zip_out = gr.File(label="Download ZIP (if multiple)")
             rep = gr.Textbox(label="Report", lines=1)
-    def run(files, mode, target, keep, min_sil, thresh, do_norm, fmt):
         files = files or []
         if not files:
             return None, None, None, "Please upload at least one audio file."
-        # Process first file for preview & (if single) for direct download
         single_blob, report = process_single(
             open(files[0], "rb"),
             mode=mode, target_seconds=target, keep_silence_s=keep,
@@ -156,22 +179,25 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
         preview_path = write_temp_for_preview(single_blob, fmt)
         if len(files) == 1:
-            # Direct download for single file
-            direct_file = single_blob
-            return preview_path, direct_file, None, report
         else:
-            # ZIP for multiple files
             opened = [open(p, "rb") for p in files]
             zipped = process_batch(
                 opened, mode=mode, target_seconds=target, keep_silence_s=keep,
                 min_silence_ms=min_sil, silence_thresh_db=thresh,
                 do_normalize=do_norm, fmt=fmt
             )
-            return preview_path, None, zipped, report
     go.click(
         run,
-        [files, mode, target, keep, min_sil, thresh, do_norm, fmt],
         [preview, direct, zip_out, rep]
     )

 import gradio as gr
+import io, os, re, uuid, zipfile, tempfile, subprocess
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
     buf.seek(0)
     return buf
+def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
     chunks = split_on_silence(
         seg,
         min_silence_len=int(min_silence_ms),
     return seg + AudioSegment.silent(duration=t_ms - len(seg))
 def _atempo_chain(factor: float) -> str:
     steps = []
     f = max(0.1, min(10.0, float(factor)))
     while f < 0.5:
             return io.BytesIO(f.read())
 def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
     import math
     rms = seg.rms or 1
     current_db = 20 * math.log10(rms / (1 << 15))
     gain_db = float(target_lufs) - current_db
     return seg.apply_gain(gain_db)
+def sanitize_filename(name: str, default_stem="output"):
+    name = (name or "").strip()
+    # allow letters, digits, spaces, dashes, underscores; collapse spaces to underscores
+    safe = re.sub(r"[^A-Za-z0-9 _.-]", "", name)
+    safe = re.sub(r"\s+", "_", safe)
+    return safe or default_stem
+def write_temp_file_with_name(blob: io.BytesIO, stem: str, ext: str) -> str:
+    stem = sanitize_filename(stem)
+    ext = ext.lower().strip(".")
+    tf = tempfile.NamedTemporaryFile(delete=False, prefix=f"{stem}_", suffix=f".{ext}")
+    tf.write(blob.getvalue())
+    tf.flush(); tf.close()
+    # Rename to exact requested stem if possible (NamedTemporaryFile adds random chars in prefix)
+    exact_path = os.path.join(os.path.dirname(tf.name), f"{stem}.{ext}")
+    try:
+        os.replace(tf.name, exact_path)
+        return exact_path
+    except Exception:
+        return tf.name  # fallback
 # ---------- processors ----------
 def process_single(file, mode, target_seconds, keep_silence_s,
                    min_silence_ms, silence_thresh_db, do_normalize, fmt):
     raw = file if isinstance(file, (bytes, bytearray)) else file.read()
     original = _load(raw)
     cleaned = remove_silence(
         original,
         keep_ms=int(float(keep_silence_s) * 1000),
         thresh_db=float(silence_thresh_db),
     )
     if do_normalize:
         cleaned = normalize_lufs(cleaned, -14.0)
     if mode == "trim" and target_seconds:
         final = trim_to_seconds(cleaned, target_seconds)
         out = _export(final, fmt)
     return zbuf
 def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
     tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
     tf.write(blob.getvalue())
     tf.flush(); tf.close()
     return tf.name
+# ---------- UI (minimal + custom filename) ----------
+css = """
+.gradio-container { max-width: 880px !important; margin: auto !important; }
+"""
+with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
+    gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.\n\n**Tip:** Set a custom file name below for your download.")
     with gr.Row():
         with gr.Column():
+            files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
             mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
+            target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
+            keep = gr.Number(value=0.25, label="Set pause length (seconds)")
+            # NEW: custom filename stem (no extension)
+            out_name = gr.Textbox(value="voiceover", label="Output filename (no extension)")
+            with gr.Accordion("Advanced (optional)", open=False):
+                min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
+                thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
+                do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
             fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
+            go = gr.Button("Process", variant="primary")
         with gr.Column():
             preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
             zip_out = gr.File(label="Download ZIP (if multiple)")
             rep = gr.Textbox(label="Report", lines=1)
+    def run(files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt):
         files = files or []
         if not files:
             return None, None, None, "Please upload at least one audio file."
+        # process first file
         single_blob, report = process_single(
             open(files[0], "rb"),
             mode=mode, target_seconds=target, keep_silence_s=keep,
         preview_path = write_temp_for_preview(single_blob, fmt)
         if len(files) == 1:
+            # return a file path with the requested name + extension
+            stem = sanitize_filename(out_name, default_stem="output")
+            out_path = write_temp_file_with_name(single_blob, stem, fmt)
+            return preview_path, out_path, None, report
         else:
+            # multi → zip (named after chosen stem)
             opened = [open(p, "rb") for p in files]
             zipped = process_batch(
                 opened, mode=mode, target_seconds=target, keep_silence_s=keep,
                 min_silence_ms=min_sil, silence_thresh_db=thresh,
                 do_normalize=do_norm, fmt=fmt
             )
+            zip_stem = sanitize_filename(out_name, default_stem="batch_output")
+            zip_path = write_temp_file_with_name(zipped, f"{zip_stem}_batch", "zip")
+            return preview_path, None, zip_path, report
     go.click(
         run,
+        [files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt],
         [preview, direct, zip_out, rep]
     )