Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,11 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
|
|
| 18 |
return buf
|
| 19 |
|
| 20 |
def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
chunks = split_on_silence(
|
| 22 |
seg,
|
| 23 |
min_silence_len=int(min_silence_ms),
|
|
@@ -30,9 +35,11 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
|
|
| 30 |
t_ms = max(0, int(float(target_s) * 1000))
|
| 31 |
if len(seg) >= t_ms:
|
| 32 |
return seg[:t_ms]
|
|
|
|
| 33 |
return seg + AudioSegment.silent(duration=t_ms - len(seg))
|
| 34 |
|
| 35 |
def _atempo_chain(factor: float) -> str:
|
|
|
|
| 36 |
steps = []
|
| 37 |
f = max(0.1, min(10.0, float(factor)))
|
| 38 |
while f < 0.5:
|
|
@@ -43,6 +50,7 @@ def _atempo_chain(factor: float) -> str:
|
|
| 43 |
return ",".join([f"atempo={s:.5f}" for s in steps])
|
| 44 |
|
| 45 |
def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
|
|
|
|
| 46 |
with tempfile.TemporaryDirectory() as d:
|
| 47 |
inp = os.path.join(d, "in.wav")
|
| 48 |
outp = os.path.join(d, f"out.{fmt_out}")
|
|
@@ -57,6 +65,7 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
|
|
| 57 |
return io.BytesIO(f.read())
|
| 58 |
|
| 59 |
def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
|
|
|
|
| 60 |
import math
|
| 61 |
rms = seg.rms or 1
|
| 62 |
current_db = 20 * math.log10(rms / (1 << 15))
|
|
@@ -69,6 +78,7 @@ def process_single(file, mode, target_seconds, keep_silence_s,
|
|
| 69 |
raw = file if isinstance(file, (bytes, bytearray)) else file.read()
|
| 70 |
original = _load(raw)
|
| 71 |
|
|
|
|
| 72 |
cleaned = remove_silence(
|
| 73 |
original,
|
| 74 |
keep_ms=int(float(keep_silence_s) * 1000),
|
|
@@ -76,9 +86,11 @@ def process_single(file, mode, target_seconds, keep_silence_s,
|
|
| 76 |
thresh_db=float(silence_thresh_db),
|
| 77 |
)
|
| 78 |
|
|
|
|
| 79 |
if do_normalize:
|
| 80 |
cleaned = normalize_lufs(cleaned, -14.0)
|
| 81 |
|
|
|
|
| 82 |
if mode == "trim" and target_seconds:
|
| 83 |
final = trim_to_seconds(cleaned, target_seconds)
|
| 84 |
out = _export(final, fmt)
|
|
@@ -104,24 +116,48 @@ def process_batch(files, **kwargs) -> io.BytesIO:
|
|
| 104 |
return zbuf
|
| 105 |
|
| 106 |
def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
|
|
|
|
| 107 |
tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
|
| 108 |
tf.write(blob.getvalue())
|
| 109 |
tf.flush(); tf.close()
|
| 110 |
return tf.name
|
| 111 |
|
| 112 |
-
# ---------- UI (two-column, compact) ----------
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
"""
|
| 116 |
|
| 117 |
-
with gr.Blocks(title="AI Voice Studio – Simple", css=
|
| 118 |
-
gr.Markdown("## AI Voice Studio
|
| 119 |
|
| 120 |
-
with gr.Row():
|
| 121 |
# Left column: controls
|
| 122 |
-
with gr.Column(
|
| 123 |
files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
|
| 124 |
-
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode"
|
| 125 |
target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
|
| 126 |
keep = gr.Number(value=0.25, label="Set pause length (seconds)")
|
| 127 |
|
|
@@ -134,8 +170,8 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
|
| 134 |
go = gr.Button("Process", variant="primary")
|
| 135 |
|
| 136 |
# Right column: outputs
|
| 137 |
-
with gr.Column(
|
| 138 |
-
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
|
| 139 |
direct = gr.File(label="Download processed file (single)")
|
| 140 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 141 |
rep = gr.Textbox(label="Report", lines=1)
|
|
@@ -145,6 +181,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
|
| 145 |
if not files:
|
| 146 |
return None, None, None, "Please upload at least one audio file."
|
| 147 |
|
|
|
|
| 148 |
single_blob, report = process_single(
|
| 149 |
open(files[0], "rb"),
|
| 150 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
@@ -164,6 +201,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
|
|
| 164 |
)
|
| 165 |
return preview_path, None, zipped, report
|
| 166 |
|
|
|
|
| 167 |
go.click(
|
| 168 |
run,
|
| 169 |
[files, mode, target, keep, min_sil, thresh, do_norm, fmt],
|
|
|
|
| 18 |
return buf
|
| 19 |
|
| 20 |
def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
|
| 21 |
+
"""
|
| 22 |
+
keep_ms: how much silence to keep at each cut (your final pause length)
|
| 23 |
+
min_silence_ms: only treat silence >= this length as a pause
|
| 24 |
+
thresh_db: what counts as "silence" (in dBFS), e.g., -45 for voiceovers
|
| 25 |
+
"""
|
| 26 |
chunks = split_on_silence(
|
| 27 |
seg,
|
| 28 |
min_silence_len=int(min_silence_ms),
|
|
|
|
| 35 |
t_ms = max(0, int(float(target_s) * 1000))
|
| 36 |
if len(seg) >= t_ms:
|
| 37 |
return seg[:t_ms]
|
| 38 |
+
# pad if shorter
|
| 39 |
return seg + AudioSegment.silent(duration=t_ms - len(seg))
|
| 40 |
|
| 41 |
def _atempo_chain(factor: float) -> str:
|
| 42 |
+
# Split large/small adjustments into steps within [0.5, 2.0] for quality
|
| 43 |
steps = []
|
| 44 |
f = max(0.1, min(10.0, float(factor)))
|
| 45 |
while f < 0.5:
|
|
|
|
| 50 |
return ",".join([f"atempo={s:.5f}" for s in steps])
|
| 51 |
|
| 52 |
def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
|
| 53 |
+
"""Pitch-preserving time stretch via FFmpeg atempo."""
|
| 54 |
with tempfile.TemporaryDirectory() as d:
|
| 55 |
inp = os.path.join(d, "in.wav")
|
| 56 |
outp = os.path.join(d, f"out.{fmt_out}")
|
|
|
|
| 65 |
return io.BytesIO(f.read())
|
| 66 |
|
| 67 |
def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
|
| 68 |
+
# Lightweight RMS-based normalization (minimal deps)
|
| 69 |
import math
|
| 70 |
rms = seg.rms or 1
|
| 71 |
current_db = 20 * math.log10(rms / (1 << 15))
|
|
|
|
| 78 |
raw = file if isinstance(file, (bytes, bytearray)) else file.read()
|
| 79 |
original = _load(raw)
|
| 80 |
|
| 81 |
+
# 1) pause cleanup / normalization
|
| 82 |
cleaned = remove_silence(
|
| 83 |
original,
|
| 84 |
keep_ms=int(float(keep_silence_s) * 1000),
|
|
|
|
| 86 |
thresh_db=float(silence_thresh_db),
|
| 87 |
)
|
| 88 |
|
| 89 |
+
# 2) loudness normalize
|
| 90 |
if do_normalize:
|
| 91 |
cleaned = normalize_lufs(cleaned, -14.0)
|
| 92 |
|
| 93 |
+
# 3) timing
|
| 94 |
if mode == "trim" and target_seconds:
|
| 95 |
final = trim_to_seconds(cleaned, target_seconds)
|
| 96 |
out = _export(final, fmt)
|
|
|
|
| 116 |
return zbuf
|
| 117 |
|
| 118 |
def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
|
| 119 |
+
# Gradio audio prefers a file path for the preview widget
|
| 120 |
tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
|
| 121 |
tf.write(blob.getvalue())
|
| 122 |
tf.flush(); tf.close()
|
| 123 |
return tf.name
|
| 124 |
|
| 125 |
+
# ---------- UI (force two-column, compact) ----------
|
| 126 |
+
CSS = """
|
| 127 |
+
/* wider canvas */
|
| 128 |
+
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding: 8px 10px !important; }
|
| 129 |
+
|
| 130 |
+
/* force two columns with sane minimums */
|
| 131 |
+
#twocol {
|
| 132 |
+
display: grid;
|
| 133 |
+
grid-template-columns: minmax(320px, 1fr) minmax(320px, 1fr);
|
| 134 |
+
gap: 12px;
|
| 135 |
+
align-items: start;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
/* tighten component spacing */
|
| 139 |
+
#twocol .block, #twocol .form, #twocol .gap { gap: 8px !important; }
|
| 140 |
+
#twocol .gr-button { height: 40px; }
|
| 141 |
+
#twocol .gr-number input { height: 36px; }
|
| 142 |
+
#twocol .gr-textbox textarea { min-height: 40px; }
|
| 143 |
+
|
| 144 |
+
/* compact audio bar */
|
| 145 |
+
#preview-audio audio { width: 100%; height: 36px; }
|
| 146 |
+
|
| 147 |
+
/* Only stack on very small screens */
|
| 148 |
+
@media (max-width: 600px) {
|
| 149 |
+
#twocol { grid-template-columns: 1fr; }
|
| 150 |
+
}
|
| 151 |
"""
|
| 152 |
|
| 153 |
+
with gr.Blocks(title="AI Voice Studio – Simple", css=CSS) as demo:
|
| 154 |
+
gr.Markdown("### AI Voice Studio — Set pause length; optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
|
| 155 |
|
| 156 |
+
with gr.Row(elem_id="twocol"):
|
| 157 |
# Left column: controls
|
| 158 |
+
with gr.Column():
|
| 159 |
files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
|
| 160 |
+
mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
|
| 161 |
target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
|
| 162 |
keep = gr.Number(value=0.25, label="Set pause length (seconds)")
|
| 163 |
|
|
|
|
| 170 |
go = gr.Button("Process", variant="primary")
|
| 171 |
|
| 172 |
# Right column: outputs
|
| 173 |
+
with gr.Column():
|
| 174 |
+
preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False, elem_id="preview-audio")
|
| 175 |
direct = gr.File(label="Download processed file (single)")
|
| 176 |
zip_out = gr.File(label="Download ZIP (if multiple)")
|
| 177 |
rep = gr.Textbox(label="Report", lines=1)
|
|
|
|
| 181 |
if not files:
|
| 182 |
return None, None, None, "Please upload at least one audio file."
|
| 183 |
|
| 184 |
+
# process first file (preview + single download)
|
| 185 |
single_blob, report = process_single(
|
| 186 |
open(files[0], "rb"),
|
| 187 |
mode=mode, target_seconds=target, keep_silence_s=keep,
|
|
|
|
| 201 |
)
|
| 202 |
return preview_path, None, zipped, report
|
| 203 |
|
| 204 |
+
# wire UI
|
| 205 |
go.click(
|
| 206 |
run,
|
| 207 |
[files, mode, target, keep, min_sil, thresh, do_norm, fmt],
|