lifesee commited on
Commit
89f7924
·
verified ·
1 Parent(s): 8593d59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -10
app.py CHANGED
@@ -18,6 +18,11 @@ def _export(seg: AudioSegment, fmt="mp3") -> io.BytesIO:
18
  return buf
19
 
20
  def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
 
 
 
 
 
21
  chunks = split_on_silence(
22
  seg,
23
  min_silence_len=int(min_silence_ms),
@@ -30,9 +35,11 @@ def trim_to_seconds(seg: AudioSegment, target_s: float):
30
  t_ms = max(0, int(float(target_s) * 1000))
31
  if len(seg) >= t_ms:
32
  return seg[:t_ms]
 
33
  return seg + AudioSegment.silent(duration=t_ms - len(seg))
34
 
35
  def _atempo_chain(factor: float) -> str:
 
36
  steps = []
37
  f = max(0.1, min(10.0, float(factor)))
38
  while f < 0.5:
@@ -43,6 +50,7 @@ def _atempo_chain(factor: float) -> str:
43
  return ",".join([f"atempo={s:.5f}" for s in steps])
44
 
45
  def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
 
46
  with tempfile.TemporaryDirectory() as d:
47
  inp = os.path.join(d, "in.wav")
48
  outp = os.path.join(d, f"out.{fmt_out}")
@@ -57,6 +65,7 @@ def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.Byte
57
  return io.BytesIO(f.read())
58
 
59
  def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
 
60
  import math
61
  rms = seg.rms or 1
62
  current_db = 20 * math.log10(rms / (1 << 15))
@@ -69,6 +78,7 @@ def process_single(file, mode, target_seconds, keep_silence_s,
69
  raw = file if isinstance(file, (bytes, bytearray)) else file.read()
70
  original = _load(raw)
71
 
 
72
  cleaned = remove_silence(
73
  original,
74
  keep_ms=int(float(keep_silence_s) * 1000),
@@ -76,9 +86,11 @@ def process_single(file, mode, target_seconds, keep_silence_s,
76
  thresh_db=float(silence_thresh_db),
77
  )
78
 
 
79
  if do_normalize:
80
  cleaned = normalize_lufs(cleaned, -14.0)
81
 
 
82
  if mode == "trim" and target_seconds:
83
  final = trim_to_seconds(cleaned, target_seconds)
84
  out = _export(final, fmt)
@@ -104,24 +116,48 @@ def process_batch(files, **kwargs) -> io.BytesIO:
104
  return zbuf
105
 
106
  def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
 
107
  tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
108
  tf.write(blob.getvalue())
109
  tf.flush(); tf.close()
110
  return tf.name
111
 
112
- # ---------- UI (two-column, compact) ----------
113
- css = """
114
- .gradio-container { max-width: 1100px !important; margin: auto !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  """
116
 
117
- with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
118
- gr.Markdown("## AI Voice Studio\nSet pause length. Optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
119
 
120
- with gr.Row():
121
  # Left column: controls
122
- with gr.Column(scale=1):
123
  files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
124
- mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode", elem_id="mode")
125
  target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
126
  keep = gr.Number(value=0.25, label="Set pause length (seconds)")
127
 
@@ -134,8 +170,8 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
134
  go = gr.Button("Process", variant="primary")
135
 
136
  # Right column: outputs
137
- with gr.Column(scale=1):
138
- preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False)
139
  direct = gr.File(label="Download processed file (single)")
140
  zip_out = gr.File(label="Download ZIP (if multiple)")
141
  rep = gr.Textbox(label="Report", lines=1)
@@ -145,6 +181,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
145
  if not files:
146
  return None, None, None, "Please upload at least one audio file."
147
 
 
148
  single_blob, report = process_single(
149
  open(files[0], "rb"),
150
  mode=mode, target_seconds=target, keep_silence_s=keep,
@@ -164,6 +201,7 @@ with gr.Blocks(title="AI Voice Studio – Simple", css=css) as demo:
164
  )
165
  return preview_path, None, zipped, report
166
 
 
167
  go.click(
168
  run,
169
  [files, mode, target, keep, min_sil, thresh, do_norm, fmt],
 
18
  return buf
19
 
20
  def remove_silence(seg: AudioSegment, keep_ms=250, min_silence_ms=120, thresh_db=-45):
21
+ """
22
+ keep_ms: how much silence to keep at each cut (your final pause length)
23
+ min_silence_ms: only treat silence >= this length as a pause
24
+ thresh_db: what counts as "silence" (in dBFS), e.g., -45 for voiceovers
25
+ """
26
  chunks = split_on_silence(
27
  seg,
28
  min_silence_len=int(min_silence_ms),
 
35
  t_ms = max(0, int(float(target_s) * 1000))
36
  if len(seg) >= t_ms:
37
  return seg[:t_ms]
38
+ # pad if shorter
39
  return seg + AudioSegment.silent(duration=t_ms - len(seg))
40
 
41
  def _atempo_chain(factor: float) -> str:
42
+ # Split large/small adjustments into steps within [0.5, 2.0] for quality
43
  steps = []
44
  f = max(0.1, min(10.0, float(factor)))
45
  while f < 0.5:
 
50
  return ",".join([f"atempo={s:.5f}" for s in steps])
51
 
52
  def fit_to_seconds(seg: AudioSegment, target_s: float, fmt_out="mp3") -> io.BytesIO:
53
+ """Pitch-preserving time stretch via FFmpeg atempo."""
54
  with tempfile.TemporaryDirectory() as d:
55
  inp = os.path.join(d, "in.wav")
56
  outp = os.path.join(d, f"out.{fmt_out}")
 
65
  return io.BytesIO(f.read())
66
 
67
  def normalize_lufs(seg: AudioSegment, target_lufs=-14.0):
68
+ # Lightweight RMS-based normalization (minimal deps)
69
  import math
70
  rms = seg.rms or 1
71
  current_db = 20 * math.log10(rms / (1 << 15))
 
78
  raw = file if isinstance(file, (bytes, bytearray)) else file.read()
79
  original = _load(raw)
80
 
81
+ # 1) pause cleanup / normalization
82
  cleaned = remove_silence(
83
  original,
84
  keep_ms=int(float(keep_silence_s) * 1000),
 
86
  thresh_db=float(silence_thresh_db),
87
  )
88
 
89
+ # 2) loudness normalize
90
  if do_normalize:
91
  cleaned = normalize_lufs(cleaned, -14.0)
92
 
93
+ # 3) timing
94
  if mode == "trim" and target_seconds:
95
  final = trim_to_seconds(cleaned, target_seconds)
96
  out = _export(final, fmt)
 
116
  return zbuf
117
 
118
  def write_temp_for_preview(blob: io.BytesIO, fmt: str) -> str:
119
+ # Gradio audio prefers a file path for the preview widget
120
  tf = tempfile.NamedTemporaryFile(delete=False, suffix=f".{fmt}")
121
  tf.write(blob.getvalue())
122
  tf.flush(); tf.close()
123
  return tf.name
124
 
125
+ # ---------- UI (force two-column, compact) ----------
126
+ CSS = """
127
+ /* wider canvas */
128
+ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding: 8px 10px !important; }
129
+
130
+ /* force two columns with sane minimums */
131
+ #twocol {
132
+ display: grid;
133
+ grid-template-columns: minmax(320px, 1fr) minmax(320px, 1fr);
134
+ gap: 12px;
135
+ align-items: start;
136
+ }
137
+
138
+ /* tighten component spacing */
139
+ #twocol .block, #twocol .form, #twocol .gap { gap: 8px !important; }
140
+ #twocol .gr-button { height: 40px; }
141
+ #twocol .gr-number input { height: 36px; }
142
+ #twocol .gr-textbox textarea { min-height: 40px; }
143
+
144
+ /* compact audio bar */
145
+ #preview-audio audio { width: 100%; height: 36px; }
146
+
147
+ /* Only stack on very small screens */
148
+ @media (max-width: 600px) {
149
+ #twocol { grid-template-columns: 1fr; }
150
+ }
151
  """
152
 
153
+ with gr.Blocks(title="AI Voice Studio – Simple", css=CSS) as demo:
154
+ gr.Markdown("### AI Voice Studio — Set pause length; optionally **Trim** or **Fit** to exact time. Export MP3/WAV/M4A/OGG.")
155
 
156
+ with gr.Row(elem_id="twocol"):
157
  # Left column: controls
158
+ with gr.Column():
159
  files = gr.Files(label="Upload audio", file_types=["audio"], type="filepath")
160
+ mode = gr.Radio(["none", "trim", "fit"], value="none", label="Timing mode")
161
  target = gr.Number(value=30, label="Target seconds (used for trim/fit)")
162
  keep = gr.Number(value=0.25, label="Set pause length (seconds)")
163
 
 
170
  go = gr.Button("Process", variant="primary")
171
 
172
  # Right column: outputs
173
+ with gr.Column():
174
+ preview = gr.Audio(label="Preview (first file)", type="filepath", interactive=False, elem_id="preview-audio")
175
  direct = gr.File(label="Download processed file (single)")
176
  zip_out = gr.File(label="Download ZIP (if multiple)")
177
  rep = gr.Textbox(label="Report", lines=1)
 
181
  if not files:
182
  return None, None, None, "Please upload at least one audio file."
183
 
184
+ # process first file (preview + single download)
185
  single_blob, report = process_single(
186
  open(files[0], "rb"),
187
  mode=mode, target_seconds=target, keep_silence_s=keep,
 
201
  )
202
  return preview_path, None, zipped, report
203
 
204
+ # wire UI
205
  go.click(
206
  run,
207
  [files, mode, target, keep, min_sil, thresh, do_norm, fmt],