Spaces:

lifesee
/

VoiceoverStudio

Running

App Files Files Community

lifesee commited on Aug 24, 2025

Commit

89f7924

verified ·

1 Parent(s): 8593d59

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -10

app.py CHANGED Viewed

@@ -18,6 +18,11 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
     return buf
 def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
     chunks = split_on_silence(
         seg,
         min_silence_len=int(min_silence_ms),
@@ -30,9 +35,11 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
     t_ms = max(0, int(float(target_s) * 1000))
     if len(seg) >= t_ms:
         return seg[:t_ms]
     return seg + AudioSegment.silent(duration=t_ms - len(seg))
 def _atempo_chain(factor: float) -> str:
     steps = []
     f = max(0.1, min(10.0, float(factor)))
     while f < 0.5:
@@ -43,6 +50,7 @@ def _atempo_chain(factor: float) -> str:
     return ",".join([f"atempo={s:.5f}" for s in steps])
 def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
     with tempfile.TemporaryDirectory() as d:
         inp = os.path.join(d, "in.wav")
         outp = os.path.join(d, f"out.{fmt_out}")
@@ -57,6 +65,7 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
             return io.BytesIO(f.read())
 def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
     import math
     rms = seg.rms or 1
     current_db = 20 * math.log10(rms / (1 << 15))
@@ -69,6 +78,7 @@ def process_single(file, mode, target_seconds, keep_silence_s,
     raw = file if isinstance(file, (bytes, bytearray)) else file.read()
     original = _load(raw)
     cleaned = remove_silence(
         original,
         keep_ms=int(float(keep_silence_s) * 1000),
@@ -76,9 +86,11 @@ def process_single(file, mode, target_seconds, keep_silence_s,
         thresh_db=float(silence_thresh_db),
     )
     if do_normalize:
         cleaned = normalize_lufs(cleaned, -14.0)
     if mode == "trim" and target_seconds:
         final = trim_to_seconds(cleaned, target_seconds)
         out = _export(final, fmt)
@@ -104,24 +116,48 @@ def process_batch(files, **kwargs) -> io.BytesIO:
     return zbuf
 def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
     tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
     tf.write(blob.getvalue())
     tf.flush(); tf.close()
     return tf.name
-# ---------- UI (two-column, compact) ----------
-css = """
-.gradio-container { max-width: 1100px !important; margin: auto !important; }
 """
-with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
-    gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
-    with gr.Row():
         # Left column: controls
-        with gr.Column(scale=1):
             files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
-            mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode", elem_id="mode")
             target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
             keep = gr.Number(value=0.25, label="Set pause length (seconds)")
@@ -134,8 +170,8 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
             go = gr.Button("Process", variant="primary")
         # Right column: outputs
-        with gr.Column(scale=1):
-            preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
             direct = gr.File(label="Download processed file (single)")
             zip_out = gr.File(label="Download ZIP (if multiple)")
             rep = gr.Textbox(label="Report", lines=1)
@@ -145,6 +181,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
         if not files:
             return None, None, None, "Please upload at least one audio file."
         single_blob, report = process_single(
             open(files[0], "rb"),
             mode=mode, target_seconds=target, keep_silence_s=keep,
@@ -164,6 +201,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
             )
             return preview_path, None, zipped, report
     go.click(
         run,
         [files, mode, target, keep, min_sil, thresh, do_norm, fmt],

     return buf
 def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
+    """
+    keep_ms: how much silence to keep at each cut (your final pause length)
+    min_silence_ms: only treat silence >= this length as a pause
+    thresh_db: what counts as "silence" (in dBFS), e.g., -45 for voiceovers
+    """
     chunks = split_on_silence(
         seg,
         min_silence_len=int(min_silence_ms),
     t_ms = max(0, int(float(target_s) * 1000))
     if len(seg) >= t_ms:
         return seg[:t_ms]
+    # pad if shorter
     return seg + AudioSegment.silent(duration=t_ms - len(seg))
 def _atempo_chain(factor: float) -> str:
+    # Split large/small adjustments into steps within [0.5, 2.0] for quality
     steps = []
     f = max(0.1, min(10.0, float(factor)))
     while f < 0.5:
     return ",".join([f"atempo={s:.5f}" for s in steps])
 def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
+    """Pitch-preserving time stretch via FFmpeg atempo."""
     with tempfile.TemporaryDirectory() as d:
         inp = os.path.join(d, "in.wav")
         outp = os.path.join(d, f"out.{fmt_out}")
             return io.BytesIO(f.read())
 def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
+    # Lightweight RMS-based normalization (minimal deps)
     import math
     rms = seg.rms or 1
     current_db = 20 * math.log10(rms / (1 << 15))
     raw = file if isinstance(file, (bytes, bytearray)) else file.read()
     original = _load(raw)
+    # 1) pause cleanup / normalization
     cleaned = remove_silence(
         original,
         keep_ms=int(float(keep_silence_s) * 1000),
         thresh_db=float(silence_thresh_db),
     )
+    # 2) loudness normalize
     if do_normalize:
         cleaned = normalize_lufs(cleaned, -14.0)
+    # 3) timing
     if mode == "trim" and target_seconds:
         final = trim_to_seconds(cleaned, target_seconds)
         out = _export(final, fmt)
     return zbuf
 def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
+    # Gradio audio prefers a file path for the preview widget
     tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
     tf.write(blob.getvalue())
     tf.flush(); tf.close()
     return tf.name
+# ---------- UI (force two-column, compact) ----------
+CSS = """
+/* wider canvas */
+.gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding: 8px 10px !important; }
+/* force two columns with sane minimums */
+#twocol {
+  display: grid;
+  grid-template-columns: minmax(320px, 1fr) minmax(320px, 1fr);
+  gap: 12px;
+  align-items: start;
+}
+/* tighten component spacing */
+#twocol .block, #twocol .form, #twocol .gap { gap: 8px !important; }
+#twocol .gr-button { height: 40px; }
+#twocol .gr-number input { height: 36px; }
+#twocol .gr-textbox textarea { min-height: 40px; }
+/* compact audio bar */
+#preview-audio audio { width: 100%; height: 36px; }
+/* Only stack on very small screens */
+@media (max-width: 600px) {
+  #twocol { grid-template-columns: 1fr; }
+}
 """
+with gr.Blocks(title="AI Voice Studio – Simple", css=CSS) as demo:
+    gr.Markdown("### AI Voice Studio — Set pause length; optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
+    with gr.Row(elem_id="twocol"):
         # Left column: controls
+        with gr.Column():
             files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
+            mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
             target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
             keep = gr.Number(value=0.25, label="Set pause length (seconds)")
             go = gr.Button("Process", variant="primary")
         # Right column: outputs
+        with gr.Column():
+            preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False, elem_id="preview-audio")
             direct = gr.File(label="Download processed file (single)")
             zip_out = gr.File(label="Download ZIP (if multiple)")
             rep = gr.Textbox(label="Report", lines=1)
         if not files:
             return None, None, None, "Please upload at least one audio file."
+        # process first file (preview + single download)
         single_blob, report = process_single(
             open(files[0], "rb"),
             mode=mode, target_seconds=target, keep_silence_s=keep,
             )
             return preview_path, None, zipped, report
+    # wire UI
     go.click(
         run,
         [files, mode, target, keep, min_sil, thresh, do_norm, fmt],