lifesee commited on
Commit
ab32289
·
verified ·
1 Parent(s): 35d8a2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -29
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import io, os, uuid, zipfile, tempfile, subprocess
3
  from pydub import AudioSegment
4
  from pydub.silence import split_on_silence
5
 
@@ -17,7 +17,7 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
17
  buf.seek(0)
18
  return buf
19
 
20
- def remove_silence(seg: AudioSegment, keep_ms=50, min_silence_ms=100, thresh_db=-45):
21
  chunks = split_on_silence(
22
  seg,
23
  min_silence_len=int(min_silence_ms),
@@ -33,7 +33,6 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
33
  return seg + AudioSegment.silent(duration=t_ms - len(seg))
34
 
35
  def _atempo_chain(factor: float) -> str:
36
- # Build a chain so each step stays within [0.5, 2.0] for better quality.
37
  steps = []
38
  f = max(0.1, min(10.0, float(factor)))
39
  while f < 0.5:
@@ -59,20 +58,39 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
59
  return io.BytesIO(f.read())
60
 
61
  def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
62
- # Lightweight perceived normalization using RMS (keeps deps minimal).
63
  import math
64
  rms = seg.rms or 1
65
  current_db = 20 * math.log10(rms / (1 << 15))
66
  gain_db = float(target_lufs) - current_db
67
  return seg.apply_gain(gain_db)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # ---------- processors ----------
70
  def process_single(file, mode, target_seconds, keep_silence_s,
71
  min_silence_ms, silence_thresh_db, do_normalize, fmt):
72
  raw = file if isinstance(file, (bytes, bytearray)) else file.read()
73
  original = _load(raw)
74
 
75
- # 1) optional silence removal / pause control
76
  cleaned = remove_silence(
77
  original,
78
  keep_ms=int(float(keep_silence_s) * 1000),
@@ -80,11 +98,9 @@ def process_single(file, mode, target_seconds, keep_silence_s,
80
  thresh_db=float(silence_thresh_db),
81
  )
82
 
83
- # 2) optional loudness normalize
84
  if do_normalize:
85
  cleaned = normalize_lufs(cleaned, -14.0)
86
 
87
- # 3) timing mode
88
  if mode == "trim" and target_seconds:
89
  final = trim_to_seconds(cleaned, target_seconds)
90
  out = _export(final, fmt)
@@ -110,30 +126,37 @@ def process_batch(files, **kwargs) -> io.BytesIO:
110
  return zbuf
111
 
112
  def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
113
- # Gradio Audio preview works great with a file path; write a temp file.
114
  tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
115
  tf.write(blob.getvalue())
116
  tf.flush(); tf.close()
117
  return tf.name
118
 
119
- # ---------- UI ----------
120
- with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
121
- gr.Markdown(
122
- "### Remove or normalize pauses, **set pause length**, **trim to exact time**, or **fit length (pitch preserved)**.\n"
123
- "_Outputs: mp3 / wav / m4a / ogg. Single file → direct download. Multiple files → ZIP._"
124
- )
 
125
 
126
  with gr.Row():
127
  with gr.Column():
128
- files = gr.Files(label="Upload audio (one or many)", file_types=["audio"], type="filepath")
 
129
  mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
130
- target = gr.Number(value=30, label="Target seconds (for trim/fit)")
131
- keep = gr.Number(value=0.25, label="Set pause length (seconds kept at cuts)")
132
- min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
133
- thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
134
- do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
 
 
 
 
 
 
135
  fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
136
- go = gr.Button("Process")
137
 
138
  with gr.Column():
139
  preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
@@ -141,12 +164,12 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
141
  zip_out = gr.File(label="Download ZIP (if multiple)")
142
  rep = gr.Textbox(label="Report", lines=1)
143
 
144
- def run(files, mode, target, keep, min_sil, thresh, do_norm, fmt):
145
  files = files or []
146
  if not files:
147
  return None, None, None, "Please upload at least one audio file."
148
 
149
- # Process first file for preview & (if single) for direct download
150
  single_blob, report = process_single(
151
  open(files[0], "rb"),
152
  mode=mode, target_seconds=target, keep_silence_s=keep,
@@ -156,22 +179,25 @@ with gr.Blocks(title="AI Voice Studio – Pause Control, Trim, Fit") as demo:
156
  preview_path = write_temp_for_preview(single_blob, fmt)
157
 
158
  if len(files) == 1:
159
- # Direct download for single file
160
- direct_file = single_blob
161
- return preview_path, direct_file, None, report
 
162
  else:
163
- # ZIP for multiple files
164
  opened = [open(p, "rb") for p in files]
165
  zipped = process_batch(
166
  opened, mode=mode, target_seconds=target, keep_silence_s=keep,
167
  min_silence_ms=min_sil, silence_thresh_db=thresh,
168
  do_normalize=do_norm, fmt=fmt
169
  )
170
- return preview_path, None, zipped, report
 
 
171
 
172
  go.click(
173
  run,
174
- [files, mode, target, keep, min_sil, thresh, do_norm, fmt],
175
  [preview, direct, zip_out, rep]
176
  )
177
 
 
1
  import gradio as gr
2
+ import io, os, re, uuid, zipfile, tempfile, subprocess
3
  from pydub import AudioSegment
4
  from pydub.silence import split_on_silence
5
 
 
17
  buf.seek(0)
18
  return buf
19
 
20
+ def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
21
  chunks = split_on_silence(
22
  seg,
23
  min_silence_len=int(min_silence_ms),
 
33
  return seg + AudioSegment.silent(duration=t_ms - len(seg))
34
 
35
  def _atempo_chain(factor: float) -> str:
 
36
  steps = []
37
  f = max(0.1, min(10.0, float(factor)))
38
  while f < 0.5:
 
58
  return io.BytesIO(f.read())
59
 
60
  def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
 
61
  import math
62
  rms = seg.rms or 1
63
  current_db = 20 * math.log10(rms / (1 << 15))
64
  gain_db = float(target_lufs) - current_db
65
  return seg.apply_gain(gain_db)
66
 
67
+ def sanitize_filename(name: str, default_stem="output"):
68
+ name = (name or "").strip()
69
+ # allow letters, digits, spaces, dashes, underscores; collapse spaces to underscores
70
+ safe = re.sub(r"[^A-Za-z0-9 _.-]", "", name)
71
+ safe = re.sub(r"\s+", "_", safe)
72
+ return safe or default_stem
73
+
74
+ def write_temp_file_with_name(blob: io.BytesIO, stem: str, ext: str) -> str:
75
+ stem = sanitize_filename(stem)
76
+ ext = ext.lower().strip(".")
77
+ tf = tempfile.NamedTemporaryFile(delete=False, prefix=f"{stem}_", suffix=f".{ext}")
78
+ tf.write(blob.getvalue())
79
+ tf.flush(); tf.close()
80
+ # Rename to exact requested stem if possible (NamedTemporaryFile adds random chars in prefix)
81
+ exact_path = os.path.join(os.path.dirname(tf.name), f"{stem}.{ext}")
82
+ try:
83
+ os.replace(tf.name, exact_path)
84
+ return exact_path
85
+ except Exception:
86
+ return tf.name # fallback
87
+
88
  # ---------- processors ----------
89
  def process_single(file, mode, target_seconds, keep_silence_s,
90
  min_silence_ms, silence_thresh_db, do_normalize, fmt):
91
  raw = file if isinstance(file, (bytes, bytearray)) else file.read()
92
  original = _load(raw)
93
 
 
94
  cleaned = remove_silence(
95
  original,
96
  keep_ms=int(float(keep_silence_s) * 1000),
 
98
  thresh_db=float(silence_thresh_db),
99
  )
100
 
 
101
  if do_normalize:
102
  cleaned = normalize_lufs(cleaned, -14.0)
103
 
 
104
  if mode == "trim" and target_seconds:
105
  final = trim_to_seconds(cleaned, target_seconds)
106
  out = _export(final, fmt)
 
126
  return zbuf
127
 
128
  def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
 
129
  tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
130
  tf.write(blob.getvalue())
131
  tf.flush(); tf.close()
132
  return tf.name
133
 
134
+ # ---------- UI (minimal + custom filename) ----------
135
+ css = """
136
+ .gradio-container { max-width: 880px !important; margin: auto !important; }
137
+ """
138
+
139
+ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
140
+ gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.\n\n**Tip:** Set a custom file name below for your download.")
141
 
142
  with gr.Row():
143
  with gr.Column():
144
+ files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
145
+
146
  mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
147
+ target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
148
+ keep = gr.Number(value=0.25, label="Set pause length (seconds)")
149
+
150
+ # NEW: custom filename stem (no extension)
151
+ out_name = gr.Textbox(value="voiceover", label="Output filename (no extension)")
152
+
153
+ with gr.Accordion("Advanced (optional)", open=False):
154
+ min_sil = gr.Slider(50, 1000, 120, step=10, label="Count a pause if silence ≥ (ms)")
155
+ thresh = gr.Slider(-80, -10, -45, step=1, label="Silence threshold (dBFS)")
156
+ do_norm = gr.Checkbox(True, label="Normalize loudness (~-14 LUFS)")
157
+
158
  fmt = gr.Dropdown(["mp3","wav","m4a","ogg"], value="mp3", label="Output format")
159
+ go = gr.Button("Process", variant="primary")
160
 
161
  with gr.Column():
162
  preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
 
164
  zip_out = gr.File(label="Download ZIP (if multiple)")
165
  rep = gr.Textbox(label="Report", lines=1)
166
 
167
+ def run(files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt):
168
  files = files or []
169
  if not files:
170
  return None, None, None, "Please upload at least one audio file."
171
 
172
+ # process first file
173
  single_blob, report = process_single(
174
  open(files[0], "rb"),
175
  mode=mode, target_seconds=target, keep_silence_s=keep,
 
179
  preview_path = write_temp_for_preview(single_blob, fmt)
180
 
181
  if len(files) == 1:
182
+ # return a file path with the requested name + extension
183
+ stem = sanitize_filename(out_name, default_stem="output")
184
+ out_path = write_temp_file_with_name(single_blob, stem, fmt)
185
+ return preview_path, out_path, None, report
186
  else:
187
+ # multi zip (named after chosen stem)
188
  opened = [open(p, "rb") for p in files]
189
  zipped = process_batch(
190
  opened, mode=mode, target_seconds=target, keep_silence_s=keep,
191
  min_silence_ms=min_sil, silence_thresh_db=thresh,
192
  do_normalize=do_norm, fmt=fmt
193
  )
194
+ zip_stem = sanitize_filename(out_name, default_stem="batch_output")
195
+ zip_path = write_temp_file_with_name(zipped, f"{zip_stem}_batch", "zip")
196
+ return preview_path, None, zip_path, report
197
 
198
  go.click(
199
  run,
200
+ [files, mode, target, keep, min_sil, thresh, do_norm, out_name, fmt],
201
  [preview, direct, zip_out, rep]
202
  )
203