staraks commited on
Commit
339fbba
·
verified ·
1 Parent(s): e166883

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -199
app.py CHANGED
@@ -1,151 +1,200 @@
 
1
  # Whisper Transcription Tool with .dct support and progress updates
2
- # Drop-in replacement for your app.py. Paste into your Hugging Face Space.
3
 
4
  from docx import Document
5
  import os
6
  import whisper
7
  import gradio as gr
8
  import pyzipper
9
- import glob
10
  import shutil
11
  import tempfile
 
12
  from pydub import AudioSegment
13
 
14
- # Load default model cache
15
  model_cache = {}
16
 
17
-
18
- def save_as_word(text, filename="merged_transcripts.docx"):
19
- """Saves the given text as a Word document."""
 
20
  document = Document()
21
  document.add_paragraph(text)
22
  document.save(filename)
23
  return filename
24
 
25
-
26
- def convert_to_wav_if_needed(input_path):
27
  """
28
- If the input file is not WAV, try to convert it to WAV using pydub/ffmpeg.
29
- First attempts pydub (ffmpeg via AudioSegment). If that fails (common for obscure
30
- .dct variants), it falls back to trying several ffmpeg raw-format heuristics
31
- (different sample rates, signed/unsigned 8/16-bit) until one succeeds.
32
- Returns path to WAV file (may be same as input if already WAV).
33
- Raises an exception with ffmpeg stderr if no conversion worked.
34
  """
35
- import subprocess
36
-
37
  lower = input_path.lower()
38
  if lower.endswith('.wav'):
39
  return input_path
40
 
41
- # 1) Try pydub automatic detection first (simplest)
42
- tmp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
43
- tmp_wav.close()
44
  try:
45
- AudioSegment.from_file(input_path).export(tmp_wav.name, format='wav')
46
- return tmp_wav.name
 
 
47
  except Exception as e_auto:
48
- # Clean up the partial file
49
  try:
50
- os.unlink(tmp_wav.name)
51
  except Exception:
52
  pass
53
- # We'll try a set of ffmpeg heuristics below
54
- ffmpeg_errors = []
55
-
56
- # 2) Fallback: try various raw-format guesses with ffmpeg
57
- guesses = [
58
- # fmt, sample_rate, channels
59
- ('s16le', 16000, 1),
60
- ('s16le', 8000, 1),
61
- ('u8', 8000, 1),
62
- ('s16le', 44100, 1),
63
- ('s16le', 16000, 2),
64
- ('adpcm_ima_wav', 8000, 1),
65
- ]
66
-
67
- for fmt, sr, ch in guesses:
68
- tmp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
69
- tmp_wav.close()
70
- cmd = [
71
- 'ffmpeg', '-y', '-f', fmt, '-ar', str(sr), '-ac', str(ch), '-i', input_path,
72
- tmp_wav.name
73
- ]
74
- try:
75
- proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
76
- except Exception as e_run:
77
- ffmpeg_errors.append(f"ffmpeg run failed for fmt={fmt},sr={sr},ch={ch}: {e_run}")
78
- try:
79
- os.unlink(tmp_wav.name)
80
- except Exception:
81
- pass
82
- continue
83
-
84
- if proc.returncode == 0 and os.path.exists(tmp_wav.name) and os.path.getsize(tmp_wav.name) > 100:
85
- # success
86
- return tmp_wav.name
87
- else:
88
- err = proc.stderr or proc.stdout or 'no ffmpeg output'
89
- ffmpeg_errors.append(f"fmt={fmt},sr={sr},ch={ch} -> rc={proc.returncode} -> {err}")
90
- try:
91
- os.unlink(tmp_wav.name)
92
- except Exception:
93
- pass
94
 
95
- # If we get here, nothing worked. Provide the accumulated errors.
96
- msg = "Could not convert file to WAV. Tried pydub and several ffmpeg heuristics. Errors:
97
- " + "
98
- ".join(ffmpeg_errors)
99
- raise Exception(msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
 
 
 
 
101
 
102
- def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
- Generator function for Gradio that yields progress updates.
105
- Outputs: (log_text, transcripts_text, word_file_path_or_None, percent_int)
 
 
 
 
 
 
106
  """
107
- # initial state
108
  log_outputs = []
109
  transcript_outputs_list = []
110
  word_file_path = None
111
  extracted_audio_paths = []
112
- temp_extract_dir = "/tmp/extracted_audio"
113
 
114
- # yield initial empty state (so UI shows up immediately)
115
  yield "", "", None, 0
116
 
117
- # cleanup any previous temp dir
118
  if os.path.exists(temp_extract_dir):
119
  try:
120
  shutil.rmtree(temp_extract_dir)
121
  log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
122
- except OSError as e:
123
  log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
124
 
125
- # If a zip is provided, extract supported audio files
126
  if zip_file:
127
  log_outputs.append(f"Processing zip file: {zip_file}")
128
  yield "\n\n".join(log_outputs), "", None, 2
129
  try:
 
130
  with pyzipper.ZipFile(zip_file, 'r') as zf:
131
  if zip_password:
132
  try:
133
  zf.setpassword(zip_password.encode())
134
- except RuntimeError:
135
  log_outputs.append("Error: Incorrect password for the zip file.")
136
  yield "\n\n".join(log_outputs), "", None, 100
137
  return
138
 
139
- os.makedirs(temp_extract_dir, exist_ok=True)
140
  audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
141
  extracted_count = 0
142
  for file_info in zf.infolist():
143
  if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
144
  try:
145
- # extract returns path relative to extract dir; build absolute path
146
  zf.extract(file_info, path=temp_extract_dir)
147
  extracted_path = os.path.join(temp_extract_dir, file_info.filename)
148
- # Ensure parent dirs exist (zip could contain folders)
149
  extracted_path = os.path.normpath(extracted_path)
150
  if os.path.exists(extracted_path):
151
  extracted_audio_paths.append(extracted_path)
@@ -156,7 +205,6 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
156
 
157
  if extracted_count == 0:
158
  log_outputs.append("No supported audio files found in the zip archive.")
159
- # cleanup empty dir
160
  try:
161
  shutil.rmtree(temp_extract_dir)
162
  log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
@@ -171,7 +219,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
171
  try:
172
  shutil.rmtree(temp_extract_dir)
173
  log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
174
- except OSError as e:
175
  log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
176
  yield "\n\n".join(log_outputs), "", None, 100
177
  return
@@ -181,31 +229,29 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
181
  try:
182
  shutil.rmtree(temp_extract_dir)
183
  log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
184
- except OSError as e:
185
- log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
186
  yield "\n\n".join(log_outputs), "", None, 100
187
  return
188
 
189
- # Build list of input file paths (strings)
190
  all_audio_paths = []
191
- if file_paths:
192
- # file_paths from Gradio with type="filepath" come as list of paths
193
- if isinstance(file_paths, (list, tuple)):
194
- all_audio_paths.extend(file_paths)
195
  else:
196
- all_audio_paths.append(file_paths)
197
 
198
  if extracted_audio_paths:
199
  all_audio_paths.extend(extracted_audio_paths)
200
 
201
  if not all_audio_paths:
202
  log_outputs.append("No audio files provided for transcription.")
203
- # cleanup
204
  if os.path.exists(temp_extract_dir):
205
  try:
206
  shutil.rmtree(temp_extract_dir)
207
  log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
208
- except OSError as e:
209
  log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
210
  yield "\n\n".join(log_outputs), "", None, 100
211
  return
@@ -213,7 +259,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
213
  total_files = len(all_audio_paths)
214
  processed = 0
215
 
216
- # Load model once (cache)
217
  if model_name not in model_cache:
218
  log_outputs.append(f"Loading model: {model_name}")
219
  yield "\n\n".join(log_outputs), "", None, 3
@@ -221,26 +267,24 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
221
  model_cache[model_name] = whisper.load_model(model_name)
222
  except Exception as e:
223
  log_outputs.append(f"Error loading model {model_name}: {e}")
224
- # cleanup
225
  if os.path.exists(temp_extract_dir):
226
  try:
227
  shutil.rmtree(temp_extract_dir)
228
  log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
229
- except OSError as e:
230
- log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
231
  yield "\n\n".join(log_outputs), "", None, 100
232
  return
233
 
234
  model = model_cache[model_name]
235
 
236
- # Process files one by one and yield progress
237
  for idx, path in enumerate(all_audio_paths):
238
  basename = os.path.basename(path)
239
  try:
240
  log_outputs.append(f"Starting processing: {basename}")
241
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
242
 
243
- # If file is .dct or other non-wav, convert
244
  try:
245
  wav_path = convert_to_wav_if_needed(path)
246
  if wav_path != path:
@@ -255,7 +299,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
255
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
256
  continue
257
 
258
- # Transcribe using Whisper model
259
  try:
260
  log_outputs.append(f"Transcribing: {basename}")
261
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(10 + 80 * (processed / total_files))
@@ -263,102 +307,5 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
263
  result = model.transcribe(wav_path)
264
  transcript = result.get("text", "")
265
 
266
- # Save transcript to /tmp
267
  base = os.path.splitext(basename)[0]
268
- save_path = os.path.join('/tmp', f"{base}-transcript.txt")
269
- with open(save_path, 'w', encoding='utf-8') as f:
270
- f.write(transcript)
271
-
272
- log_outputs.append(f"File processed: {basename} -> {save_path}")
273
- transcript_outputs_list.append(f"Transcript for {basename}:\n{transcript}")
274
-
275
- except Exception as e:
276
- log_outputs.append(f"Error processing {basename}: {e}")
277
- transcript_outputs_list.append(f"Could not transcribe {basename} due to an error: {e}")
278
-
279
- finally:
280
- # remove temporary wav if we created one
281
- if wav_path != path and os.path.exists(wav_path):
282
- try:
283
- os.unlink(wav_path)
284
- except Exception:
285
- pass
286
-
287
- processed += 1
288
- percent = int(5 + 90 * (processed / total_files))
289
- yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
290
-
291
- except Exception as e:
292
- log_outputs.append(f"Unexpected error with {basename}: {e}")
293
- transcript_outputs_list.append(f"Unexpected error with {basename}: {e}")
294
- processed += 1
295
- percent = int(5 + 90 * (processed / total_files))
296
- yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
297
-
298
- # After all files processed, possibly save merged Word file
299
- combined_transcript_string = "\n\n---\n\n".join(transcript_outputs_list)
300
-
301
- if merge_checkbox and combined_transcript_string.strip():
302
- try:
303
- word_filename = save_as_word(combined_transcript_string)
304
- log_outputs.append(f"Merged transcript saved to: {word_filename}")
305
- word_file_path = word_filename
306
- except Exception as e:
307
- log_outputs.append(f"Error saving merged transcript to Word file: {e}")
308
-
309
- # cleanup extracted files
310
- if os.path.exists(temp_extract_dir):
311
- try:
312
- shutil.rmtree(temp_extract_dir)
313
- log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
314
- except OSError as e:
315
- log_outputs.append(f"Warning: Could not clean up temporary temporary directory {temp_extract_dir}: {e}")
316
-
317
- # final yield at 100%
318
- yield "\n\n".join(log_outputs), combined_transcript_string, word_file_path, 100
319
-
320
-
321
- # Gradio UI
322
- with gr.Blocks() as demo:
323
- gr.Markdown("## Whisper Transcription Tool (Multiple Files) — .dct support + progress")
324
-
325
- with gr.Row():
326
- model_dropdown = gr.Dropdown(
327
- choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"],
328
- value="base",
329
- label="Select Whisper Model"
330
- )
331
- advanced_checkbox = gr.Checkbox(label="Enable Advanced Options")
332
- merge_checkbox = gr.Checkbox(label="Merge Transcripts into Single File", value=False)
333
-
334
- with gr.Row():
335
- zip_input = gr.File(file_count="single", type="filepath", label="Upload Zip File (Optional)")
336
- zip_password_input = gr.Textbox(label="Zip File Password (Optional)", type="password")
337
-
338
- audio_input = gr.File(file_count="multiple", type="filepath", label="Upload Audio Files (Optional)")
339
-
340
- transcribe_btn = gr.Button("Start Transcription")
341
-
342
- log_output = gr.Textbox(label="Log Output", lines=10)
343
- transcript_output = gr.Textbox(label="Transcripts", lines=20)
344
- word_file_output = gr.File(label="Download Merged Transcript (.docx)")
345
- progress_num = gr.Number(value=0, label="Progress (%)")
346
-
347
- def update_file_visibility(merge_checked):
348
- return gr.update(visible=merge_checked)
349
-
350
- merge_checkbox.change(
351
- update_file_visibility,
352
- inputs=[merge_checkbox],
353
- outputs=[word_file_output],
354
- api_name="update_file_visibility"
355
- )
356
-
357
- transcribe_btn.click(
358
- transcribe_multiple,
359
- inputs=[audio_input, model_dropdown, advanced_checkbox, merge_checkbox, zip_input, zip_password_input],
360
- outputs=[log_output, transcript_output, word_file_output, progress_num]
361
- )
362
-
363
-
364
- demo.launch()
 
1
+ # app.py
2
  # Whisper Transcription Tool with .dct support and progress updates
3
+ # Drop-in for Hugging Face Spaces (requires ffmpeg in environment)
4
 
5
  from docx import Document
6
  import os
7
  import whisper
8
  import gradio as gr
9
  import pyzipper
 
10
  import shutil
11
  import tempfile
12
+ import subprocess
13
  from pydub import AudioSegment
14
 
15
+ # Model cache to avoid reloading
16
  model_cache = {}
17
 
18
+ def save_as_word(text, filename=None):
19
+ """Save text to a .docx and return the path."""
20
+ if filename is None:
21
+ filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
22
  document = Document()
23
  document.add_paragraph(text)
24
  document.save(filename)
25
  return filename
26
 
27
+ def convert_to_wav_if_needed(input_path, diagnostics_keep=False):
 
28
  """
29
+ Robust conversion: try pydub auto first. If that fails,
30
+ attempt a grid of ffmpeg raw-format guesses. On success returns WAV path.
31
+ On total failure writes diagnostics into a temp dir and raises Exception
32
+ containing the diagnostics path.
 
 
33
  """
 
 
34
  lower = input_path.lower()
35
  if lower.endswith('.wav'):
36
  return input_path
37
 
38
+ # 1) Try pydub/AudioSegment auto
39
+ auto_err = ""
 
40
  try:
41
+ tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
42
+ tmp.close()
43
+ AudioSegment.from_file(input_path).export(tmp.name, format='wav')
44
+ return tmp.name
45
  except Exception as e_auto:
46
+ auto_err = str(e_auto)
47
  try:
48
+ os.unlink(tmp.name)
49
  except Exception:
50
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # 2) Exhaustive ffmpeg guess grid
53
+ pcm_formats = ['s16le', 's32le', 's24le', 's8', 'u8', 's16be', 'pcm_s16le', 'pcm_u8', 'pcm_u16le']
54
+ mulaw_alaw = ['mulaw', 'alaw']
55
+ adpcm = ['adpcm_ima_wav', 'adpcm_ms']
56
+ other = ['gsm', 'g726', 'vorbis'] # extras; may fail but harmless
57
+ formats = pcm_formats + mulaw_alaw + adpcm + other
58
+
59
+ sample_rates = [8000, 11025, 12000, 16000, 22050, 32000, 44100, 48000]
60
+ channels = [1, 2]
61
+
62
+ diagnostics = []
63
+ diag_tmpdir = tempfile.mkdtemp(prefix='dct_diag_')
64
+ diag_log_path = os.path.join(diag_tmpdir, 'conversion_diagnostics.txt')
65
+
66
+ for fmt in formats:
67
+ for sr in sample_rates:
68
+ for ch in channels:
69
+ out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
70
+ out_wav.close()
71
+ cmd = [
72
+ 'ffmpeg', '-hide_banner', '-loglevel', 'error', '-y',
73
+ '-f', fmt, '-ar', str(sr), '-ac', str(ch),
74
+ '-i', input_path, out_wav.name
75
+ ]
76
+ try:
77
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
78
+ except Exception as e_run:
79
+ diagnostics.append(f"RUN-FAIL fmt={fmt} sr={sr} ch={ch} err={e_run}")
80
+ try:
81
+ os.unlink(out_wav.name)
82
+ except Exception:
83
+ pass
84
+ continue
85
+
86
+ rc = proc.returncode
87
+ stderr = proc.stderr.strip() if proc.stderr else ""
88
+ stdout = proc.stdout.strip() if proc.stdout else ""
89
+ diagnostics.append(f"ATTEMPT fmt={fmt} sr={sr} ch={ch} rc={rc}")
90
+ if stdout:
91
+ diagnostics.append("STDOUT:")
92
+ diagnostics.append(stdout)
93
+ if stderr:
94
+ diagnostics.append("STDERR:")
95
+ diagnostics.append(stderr)
96
+ diagnostics.append("-" * 60)
97
+
98
+ # success heuristic: exit 0 + output file present and > 200 bytes
99
+ try:
100
+ if rc == 0 and os.path.exists(out_wav.name) and os.path.getsize(out_wav.name) > 200:
101
+ # write compact diagnostics including success info
102
+ with open(diag_log_path, 'w', encoding='utf-8') as df:
103
+ df.write("pydub auto-error:\n")
104
+ df.write(auto_err + "\n\n")
105
+ df.write("Successful ffmpeg guess:\n")
106
+ df.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
107
+ df.write("Recent diagnostics (truncated):\n")
108
+ df.write("\n".join(diagnostics[-1000:]))
109
+ return out_wav.name
110
+ except Exception:
111
+ pass
112
 
113
+ try:
114
+ os.unlink(out_wav.name)
115
+ except Exception:
116
+ pass
117
 
118
+ # try ffprobe if available for more info
119
+ try:
120
+ fp = subprocess.run(['ffprobe', '-v', 'error', '-show_format', '-show_streams', input_path],
121
+ capture_output=True, text=True, timeout=15)
122
+ diagnostics.append("FFPROBE OUTPUT:")
123
+ diagnostics.append(fp.stdout.strip() or fp.stderr.strip())
124
+ except Exception as e:
125
+ diagnostics.append(f"ffprobe not available or failed: {e}")
126
+
127
+ # hex preview of first bytes
128
+ try:
129
+ with open(input_path, 'rb') as f:
130
+ head = f.read(256)
131
+ diagnostics.append("HEX PREVIEW (first 256 bytes):")
132
+ diagnostics.append(head.hex())
133
+ except Exception as e:
134
+ diagnostics.append(f"Could not read file head: {e}")
135
+
136
+ # write diagnostics
137
+ try:
138
+ with open(diag_log_path, 'w', encoding='utf-8') as df:
139
+ df.write("pydub auto-error:\n")
140
+ df.write(auto_err + "\n\n")
141
+ df.write("Full diagnostics from ffmpeg attempts:\n\n")
142
+ df.write("\n".join(diagnostics))
143
+ except Exception as e:
144
+ raise Exception(f"Conversion failed and diagnostics could not be written: {e}")
145
+
146
+ raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log_path}\nFirst diagnostics lines:\n" + "\n".join(diagnostics[:12]))
147
+
148
+ def transcribe_multiple(audio_files, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
149
  """
150
+ Generator for Gradio to yield live progress.
151
+ Inputs:
152
+ audio_files: list or single filepath(s) (type='filepath' in Gradio)
153
+ model_name: whisper model name string
154
+ merge_checkbox: boolean to merge into docx
155
+ zip_file: optional path to zip file (type='filepath')
156
+ zip_password: optional password
157
+ Yields: (log_text, transcripts_text, word_file_path_or_None, percent_int)
158
  """
 
159
  log_outputs = []
160
  transcript_outputs_list = []
161
  word_file_path = None
162
  extracted_audio_paths = []
163
+ temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
164
 
165
+ # initial yield so UI shows immediately
166
  yield "", "", None, 0
167
 
168
+ # cleanup old extract dir
169
  if os.path.exists(temp_extract_dir):
170
  try:
171
  shutil.rmtree(temp_extract_dir)
172
  log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
173
+ except Exception as e:
174
  log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
175
 
176
+ # Handle zip file (zip_file may be a path string)
177
  if zip_file:
178
  log_outputs.append(f"Processing zip file: {zip_file}")
179
  yield "\n\n".join(log_outputs), "", None, 2
180
  try:
181
+ os.makedirs(temp_extract_dir, exist_ok=True)
182
  with pyzipper.ZipFile(zip_file, 'r') as zf:
183
  if zip_password:
184
  try:
185
  zf.setpassword(zip_password.encode())
186
+ except Exception:
187
  log_outputs.append("Error: Incorrect password for the zip file.")
188
  yield "\n\n".join(log_outputs), "", None, 100
189
  return
190
 
 
191
  audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
192
  extracted_count = 0
193
  for file_info in zf.infolist():
194
  if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
195
  try:
 
196
  zf.extract(file_info, path=temp_extract_dir)
197
  extracted_path = os.path.join(temp_extract_dir, file_info.filename)
 
198
  extracted_path = os.path.normpath(extracted_path)
199
  if os.path.exists(extracted_path):
200
  extracted_audio_paths.append(extracted_path)
 
205
 
206
  if extracted_count == 0:
207
  log_outputs.append("No supported audio files found in the zip archive.")
 
208
  try:
209
  shutil.rmtree(temp_extract_dir)
210
  log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
 
219
  try:
220
  shutil.rmtree(temp_extract_dir)
221
  log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
222
+ except Exception as e:
223
  log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
224
  yield "\n\n".join(log_outputs), "", None, 100
225
  return
 
229
  try:
230
  shutil.rmtree(temp_extract_dir)
231
  log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
232
+ except Exception as e2:
233
+ log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e2}")
234
  yield "\n\n".join(log_outputs), "", None, 100
235
  return
236
 
237
+ # Build list of audio file paths
238
  all_audio_paths = []
239
+ if audio_files:
240
+ if isinstance(audio_files, (list, tuple)):
241
+ all_audio_paths.extend(audio_files)
 
242
  else:
243
+ all_audio_paths.append(audio_files)
244
 
245
  if extracted_audio_paths:
246
  all_audio_paths.extend(extracted_audio_paths)
247
 
248
  if not all_audio_paths:
249
  log_outputs.append("No audio files provided for transcription.")
 
250
  if os.path.exists(temp_extract_dir):
251
  try:
252
  shutil.rmtree(temp_extract_dir)
253
  log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
254
+ except Exception as e:
255
  log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
256
  yield "\n\n".join(log_outputs), "", None, 100
257
  return
 
259
  total_files = len(all_audio_paths)
260
  processed = 0
261
 
262
+ # Load whisper model once
263
  if model_name not in model_cache:
264
  log_outputs.append(f"Loading model: {model_name}")
265
  yield "\n\n".join(log_outputs), "", None, 3
 
267
  model_cache[model_name] = whisper.load_model(model_name)
268
  except Exception as e:
269
  log_outputs.append(f"Error loading model {model_name}: {e}")
 
270
  if os.path.exists(temp_extract_dir):
271
  try:
272
  shutil.rmtree(temp_extract_dir)
273
  log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
274
+ except Exception:
275
+ pass
276
  yield "\n\n".join(log_outputs), "", None, 100
277
  return
278
 
279
  model = model_cache[model_name]
280
 
 
281
  for idx, path in enumerate(all_audio_paths):
282
  basename = os.path.basename(path)
283
  try:
284
  log_outputs.append(f"Starting processing: {basename}")
285
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
286
 
287
+ # Convert to WAV if needed
288
  try:
289
  wav_path = convert_to_wav_if_needed(path)
290
  if wav_path != path:
 
299
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
300
  continue
301
 
302
+ # Transcribe with Whisper
303
  try:
304
  log_outputs.append(f"Transcribing: {basename}")
305
  yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(10 + 80 * (processed / total_files))
 
307
  result = model.transcribe(wav_path)
308
  transcript = result.get("text", "")
309
 
 
310
  base = os.path.splitext(basename)[0]
311
+ save_path = os.path.join(tempfile.gett_