staraks commited on
Commit
2171026
·
verified ·
1 Parent(s): 1910810

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -123
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py
2
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
3
- # Cleaned, debugged, and Spaces-ready.
4
 
5
  import os
6
  import sys
@@ -32,12 +32,12 @@ except Exception as e:
32
 
33
  print("DEBUG: imports OK", flush=True)
34
 
35
-
36
-
37
  # ---------- Config ----------
38
  MEMORY_FILE = "memory.json"
39
  MEMORY_LOCK = threading.Lock()
40
- MIN_WAV_SIZE = 200 # bytes
 
 
41
  FFMPEG_CANDIDATES = [
42
  ("s16le", 16000, 1),
43
  ("s16le", 44100, 2),
@@ -63,15 +63,19 @@ def load_memory():
63
  pass
64
  return mem
65
 
 
66
  def save_memory(mem):
67
  with MEMORY_LOCK:
68
  with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
69
  json.dump(mem, fh, ensure_ascii=False, indent=2)
70
 
71
- memory = load_memory()
72
- print("DEBUG: memory loaded (words=%d phrases=%d)" % (len(memory.get("words", {})), len(memory.get("phrases", {}))), flush=True)
73
-
74
 
 
 
 
 
 
 
75
 
76
  # ---------- Postprocessing ----------
77
  MEDICAL_ABBREVIATIONS = {
@@ -94,55 +98,54 @@ DRUG_NORMALIZATION = {
94
  "amoxicillin": "Amoxicillin",
95
  }
96
 
 
97
  def expand_abbreviations(text):
98
- tokens = re.split(r'(\s+)', text)
99
  out = []
100
  for t in tokens:
101
  key = t.lower().strip(".,;:")
102
  if key in MEDICAL_ABBREVIATIONS:
103
- trailing = ''
104
- m = re.match(r'([A-Za-z0-9/]+)([.,;:]*)', t)
105
  if m:
106
- trailing = m.group(2) or ''
107
  out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
108
  else:
109
  out.append(t)
110
- return ''.join(out)
111
-
112
-
113
-
114
-
115
 
116
 
117
  def normalize_drugs(text):
118
  for k, v in DRUG_NORMALIZATION.items():
119
- text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
120
  return text
121
 
 
122
  def punctuation_and_capitalization(text):
123
  text = text.strip()
124
  if not text:
125
  return text
126
- if not re.search(r'[.?!]\s*$', text):
127
- text = text.rstrip() + '.'
128
- parts = re.split(r'([.?!]\s+)', text)
129
  out = []
130
  for p in parts:
131
- if p and not re.match(r'[.?!]\s+', p):
132
  out.append(p.capitalize())
133
  else:
134
  out.append(p)
135
- return ''.join(out)
 
136
 
137
  def postprocess_transcript(text, format_soap=False):
138
  if not text:
139
  return text
140
- t = re.sub(r'\s+', ' ', text).strip()
141
  t = expand_abbreviations(t)
142
  t = normalize_drugs(t)
143
  t = punctuation_and_capitalization(t)
144
  if format_soap:
145
- sentences = re.split(r'(?<=[.?!])\s+', t)
146
  subj = sentences[0] if len(sentences) >= 1 else ""
147
  obj = sentences[1] if len(sentences) >= 2 else ""
148
  assessment = ""
@@ -150,22 +153,23 @@ def postprocess_transcript(text, format_soap=False):
150
  if kw in t.lower():
151
  assessment = "Assessment: " + subj
152
  break
153
- soap = f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
 
 
154
  return soap
155
  return t
156
 
157
 
158
-
159
-
160
-
161
-
162
  # ---------- Memory utilities ----------
163
  def extract_words_and_phrases(text):
164
  # basic tokenization for words; phrases = sentences
165
  words = re.findall(r"[A-Za-z0-9\-']+", text)
166
- sentences = [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
 
 
167
  return [w for w in words if w.strip()], sentences
168
 
 
169
  def update_memory_with_transcript(transcript):
170
  global memory
171
  words, sentences = extract_words_and_phrases(transcript)
@@ -193,9 +197,6 @@ def update_memory_with_transcript(transcript):
193
  pass
194
 
195
 
196
-
197
-
198
-
199
  def memory_correct_text(text, min_ratio=0.85):
200
  if not text or (not memory.get("words") and not memory.get("phrases")):
201
  return text
@@ -204,7 +205,9 @@ def memory_correct_text(text, min_ratio=0.85):
204
  lw = w.lower()
205
  if lw in memory["words"]:
206
  return w
207
- candidates = get_close_matches(lw, memory["words"].keys(), n=1, cutoff=min_ratio)
 
 
208
  if candidates:
209
  cand = candidates[0]
210
  if w and w[0].isupper():
@@ -212,46 +215,63 @@ def memory_correct_text(text, min_ratio=0.85):
212
  return cand
213
  return w
214
 
215
- tokens = re.split(r'(\W+)', text)
216
  corrected_tokens = []
217
  for tok in tokens:
218
  if re.match(r"^[A-Za-z0-9\-']+$", tok):
219
  corrected_tokens.append(fix_word(tok))
220
  else:
221
  corrected_tokens.append(tok)
222
- corrected = ''.join(corrected_tokens)
223
 
224
  for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
225
  low_phrase = phrase.lower()
226
  if len(low_phrase) < 8:
227
  continue
228
  if low_phrase in corrected.lower():
229
- corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
 
 
230
  return corrected
231
 
 
232
  # ---------- File utilities ----------
233
  def save_as_word(text, filename=None):
234
  if filename is None:
235
- filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
 
 
236
  doc = Document()
237
  doc.add_paragraph(text)
238
  doc.save(filename)
239
  return filename
240
 
241
 
242
-
243
-
244
-
245
-
246
  # ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
247
  def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
248
  cmd = [
249
- "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
250
- "-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path
 
 
 
 
 
 
 
 
 
 
 
 
251
  ]
252
  try:
253
  proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
254
- if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
 
 
 
 
255
  return True, proc.stderr + proc.stdout
256
  else:
257
  try:
@@ -268,6 +288,7 @@ def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
268
  pass
269
  return False, str(e)
270
 
 
271
  def convert_to_wav_if_needed(input_path):
272
  input_path = str(input_path)
273
  lower = input_path.lower()
@@ -295,10 +316,7 @@ def convert_to_wav_if_needed(input_path):
295
  except Exception:
296
  pass
297
 
298
-
299
-
300
-
301
-
302
  diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
303
  diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
304
  diagnostics = []
@@ -306,7 +324,9 @@ def convert_to_wav_if_needed(input_path):
306
  out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
307
  out_wav.close()
308
  success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
309
- diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
 
 
310
  if success:
311
  try:
312
  with open(diag_log, "w", encoding="utf-8") as fh:
@@ -326,9 +346,14 @@ def convert_to_wav_if_needed(input_path):
326
  except Exception:
327
  pass
328
 
 
329
  try:
330
- fp = subprocess.run(["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
331
- capture_output=True, text=True, timeout=10)
 
 
 
 
332
  diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
333
  except Exception as e:
334
  diagnostics.append("ffprobe failed: " + str(e))
@@ -348,19 +373,35 @@ def convert_to_wav_if_needed(input_path):
348
  except Exception as e:
349
  raise Exception(f"Conversion failed; diagnostics write error: {e}")
350
 
351
- raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
 
 
 
352
 
353
  # ---------- Whisper model cache ----------
354
  MODEL_CACHE = {}
355
 
 
356
  def get_whisper_model(name):
357
  if name not in MODEL_CACHE:
358
  print(f"DEBUG: loading whisper model '{name}'", flush=True)
359
  MODEL_CACHE[name] = whisper.load_model(name)
360
  return MODEL_CACHE[name]
361
 
 
362
  # ---------- Main transcription generator ----------
363
- def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbox, zip_file=None, zip_password=None, enable_memory=False):
 
 
 
 
 
 
 
 
 
 
 
364
  log = []
365
  transcripts = []
366
  word_file_path = None
@@ -370,7 +411,7 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
370
  # initial yield
371
  yield "", "", None, 0
372
 
373
- # cleanup previous
374
  if os.path.exists(temp_extract_dir):
375
  try:
376
  shutil.rmtree(temp_extract_dir)
@@ -392,7 +433,16 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
392
  log.append("Incorrect zip password")
393
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
394
  return
395
- exts = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a', '.dat', '.dct']
 
 
 
 
 
 
 
 
 
396
  count = 0
397
  for info in zf.infolist():
398
  if info.is_dir():
@@ -404,7 +454,9 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
404
  except Exception as e:
405
  log.append(f"Error extracting {info.filename}: {e}")
406
  continue
407
- p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
 
 
408
  if os.path.exists(p):
409
  extracted_audio_paths.append(p)
410
  count += 1
@@ -451,7 +503,7 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
451
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
452
  return
453
 
454
- # load model (on demand)
455
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
456
  try:
457
  model = get_whisper_model(model_name)
@@ -466,7 +518,9 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
466
  for p in paths:
467
  idx += 1
468
  log.append(f"Processing file ({idx}/{total}): {p}")
469
- yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx-1) * 80 / max(1, total))
 
 
470
 
471
  wav = None
472
  try:
@@ -474,46 +528,165 @@ def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbo
474
  log.append(f"Converted to WAV: {wav}")
475
  except Exception as e:
476
  log.append(f"Conversion failed for {p}: {e}")
477
- transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}")
478
- yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
 
 
 
 
479
  continue
480
 
481
- # continue... (UI + launch in next message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
 
 
 
 
 
 
 
 
 
483
 
 
 
484
 
 
 
 
 
 
 
 
485
 
486
 
487
- # Defensive wrapper to surface exceptions into the Logs textbox
488
- def run_transcription_wrapper(files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options_state):
489
- import traceback, io
490
- try:
491
- audio_input = files
492
- zip_path = None
493
- if zip_file:
494
- if isinstance(zip_file, (str, os.PathLike)):
495
- zip_path = str(zip_file)
496
- elif hasattr(zip_file, "name"):
497
- zip_path = zip_file.name
498
- elif isinstance(zip_file, dict) and zip_file.get("name"):
499
- zip_path = zip_file["name"]
500
- adv = {}
501
- # return the generator directly (transcribe_multiple yields tuples)
502
- return transcribe_multiple(audio_input, model_name, adv, merge_checkbox=merge, zip_file=zip_path, zip_password=zip_password, enable_memory=enable_memory)
503
- except Exception as e:
504
- # If anything raises before generator returned, produce a generator that yields the traceback
505
- buf = io.StringIO()
506
- traceback.print_exc(file=buf)
507
- tb = buf.getvalue()
508
- logs = f"EXCEPTION in run_transcription_wrapper:\n{tb}"
509
- transcripts = "ERROR: transcription did not start due to exception."
510
- # Yield once with logs and final 100% to stop spinner
511
- def error_gen():
512
- yield logs, transcripts, None, 100
513
- return error_gen()
514
-
515
-
516
- # Launch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  if __name__ == "__main__":
518
  port = int(os.environ.get("PORT", 7860))
519
  print("DEBUG: launching Gradio on port", port, flush=True)
@@ -523,34 +696,3 @@ if __name__ == "__main__":
523
  print("FATAL: demo.launch failed:", e, flush=True)
524
  traceback.print_exc()
525
  raise
526
-
527
-
528
-
529
-
530
- # Safe launch: only launch if demo exists
531
- if __name__ == "__main__":
532
- port = int(os.environ.get("PORT", 7860))
533
- print("DEBUG: preparing to launch Gradio on port", port, flush=True)
534
- try:
535
- if 'demo' in globals() and demo is not None:
536
- print("DEBUG: demo object found. launching...", flush=True)
537
- demo.queue().launch(server_name="0.0.0.0", server_port=port)
538
- else:
539
- print("FATAL: 'demo' not found. The Gradio UI block may be missing or failed to create.", flush=True)
540
- # show the tail of the file so you can inspect quickly in logs
541
- try:
542
- import inspect
543
- import pathlib
544
- print("DEBUG: last 60 lines of /app/app.py for inspection:", flush=True)
545
- with open("/app/app.py", "r", encoding="utf-8") as fh:
546
- all_lines = fh.read().splitlines()
547
- for ln in all_lines[-60:]:
548
- print(ln)
549
- except Exception:
550
- pass
551
- # Exit non-zero so platform reports failure clearly
552
- sys.exit(1)
553
- except Exception as e:
554
- print("FATAL: demo.launch failed:", e, flush=True)
555
- traceback.print_exc()
556
- raise
 
1
  # app.py
2
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
3
+ # Clean, single-version file for Hugging Face Spaces.
4
 
5
  import os
6
  import sys
 
32
 
33
  print("DEBUG: imports OK", flush=True)
34
 
 
 
35
  # ---------- Config ----------
36
  MEMORY_FILE = "memory.json"
37
  MEMORY_LOCK = threading.Lock()
38
+ MIN_WAV_SIZE = 200 # bytes
39
+
40
+ # Small ffmpeg fallback grid (hybrid conversion)
41
  FFMPEG_CANDIDATES = [
42
  ("s16le", 16000, 1),
43
  ("s16le", 44100, 2),
 
63
  pass
64
  return mem
65
 
66
+
67
  def save_memory(mem):
68
  with MEMORY_LOCK:
69
  with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
70
  json.dump(mem, fh, ensure_ascii=False, indent=2)
71
 
 
 
 
72
 
73
+ memory = load_memory()
74
+ print(
75
+ "DEBUG: memory loaded (words=%d phrases=%d)"
76
+ % (len(memory.get("words", {})), len(memory.get("phrases", {}))),
77
+ flush=True,
78
+ )
79
 
80
  # ---------- Postprocessing ----------
81
  MEDICAL_ABBREVIATIONS = {
 
98
  "amoxicillin": "Amoxicillin",
99
  }
100
 
101
+
102
  def expand_abbreviations(text):
103
+ tokens = re.split(r"(\s+)", text)
104
  out = []
105
  for t in tokens:
106
  key = t.lower().strip(".,;:")
107
  if key in MEDICAL_ABBREVIATIONS:
108
+ trailing = ""
109
+ m = re.match(r"([A-Za-z0-9/]+)([.,;:]*)", t)
110
  if m:
111
+ trailing = m.group(2) or ""
112
  out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
113
  else:
114
  out.append(t)
115
+ return "".join(out)
 
 
 
 
116
 
117
 
118
  def normalize_drugs(text):
119
  for k, v in DRUG_NORMALIZATION.items():
120
+ text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
121
  return text
122
 
123
+
124
  def punctuation_and_capitalization(text):
125
  text = text.strip()
126
  if not text:
127
  return text
128
+ if not re.search(r"[.?!]\s*$", text):
129
+ text = text.rstrip() + "."
130
+ parts = re.split(r"([.?!]\s+)", text)
131
  out = []
132
  for p in parts:
133
+ if p and not re.match(r"[.?!]\s+", p):
134
  out.append(p.capitalize())
135
  else:
136
  out.append(p)
137
+ return "".join(out)
138
+
139
 
140
  def postprocess_transcript(text, format_soap=False):
141
  if not text:
142
  return text
143
+ t = re.sub(r"\s+", " ", text).strip()
144
  t = expand_abbreviations(t)
145
  t = normalize_drugs(t)
146
  t = punctuation_and_capitalization(t)
147
  if format_soap:
148
+ sentences = re.split(r"(?<=[.?!])\s+", t)
149
  subj = sentences[0] if len(sentences) >= 1 else ""
150
  obj = sentences[1] if len(sentences) >= 2 else ""
151
  assessment = ""
 
153
  if kw in t.lower():
154
  assessment = "Assessment: " + subj
155
  break
156
+ soap = (
157
+ f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
158
+ )
159
  return soap
160
  return t
161
 
162
 
 
 
 
 
163
  # ---------- Memory utilities ----------
164
  def extract_words_and_phrases(text):
165
  # basic tokenization for words; phrases = sentences
166
  words = re.findall(r"[A-Za-z0-9\-']+", text)
167
+ sentences = [
168
+ s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()
169
+ ]
170
  return [w for w in words if w.strip()], sentences
171
 
172
+
173
  def update_memory_with_transcript(transcript):
174
  global memory
175
  words, sentences = extract_words_and_phrases(transcript)
 
197
  pass
198
 
199
 
 
 
 
200
  def memory_correct_text(text, min_ratio=0.85):
201
  if not text or (not memory.get("words") and not memory.get("phrases")):
202
  return text
 
205
  lw = w.lower()
206
  if lw in memory["words"]:
207
  return w
208
+ candidates = get_close_matches(
209
+ lw, memory["words"].keys(), n=1, cutoff=min_ratio
210
+ )
211
  if candidates:
212
  cand = candidates[0]
213
  if w and w[0].isupper():
 
215
  return cand
216
  return w
217
 
218
+ tokens = re.split(r"(\W+)", text)
219
  corrected_tokens = []
220
  for tok in tokens:
221
  if re.match(r"^[A-Za-z0-9\-']+$", tok):
222
  corrected_tokens.append(fix_word(tok))
223
  else:
224
  corrected_tokens.append(tok)
225
+ corrected = "".join(corrected_tokens)
226
 
227
  for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
228
  low_phrase = phrase.lower()
229
  if len(low_phrase) < 8:
230
  continue
231
  if low_phrase in corrected.lower():
232
+ corrected = re.sub(
233
+ re.escape(phrase), phrase, corrected, flags=re.IGNORECASE
234
+ )
235
  return corrected
236
 
237
+
238
  # ---------- File utilities ----------
239
  def save_as_word(text, filename=None):
240
  if filename is None:
241
+ filename = os.path.join(
242
+ tempfile.gettempdir(), "merged_transcripts.docx"
243
+ )
244
  doc = Document()
245
  doc.add_paragraph(text)
246
  doc.save(filename)
247
  return filename
248
 
249
 
 
 
 
 
250
  # ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
251
  def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
252
  cmd = [
253
+ "ffmpeg",
254
+ "-hide_banner",
255
+ "-loglevel",
256
+ "error",
257
+ "-y",
258
+ "-f",
259
+ fmt,
260
+ "-ar",
261
+ str(sr),
262
+ "-ac",
263
+ str(ch),
264
+ "-i",
265
+ input_path,
266
+ out_path,
267
  ]
268
  try:
269
  proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
270
+ if (
271
+ proc.returncode == 0
272
+ and os.path.exists(out_path)
273
+ and os.path.getsize(out_path) > MIN_WAV_SIZE
274
+ ):
275
  return True, proc.stderr + proc.stdout
276
  else:
277
  try:
 
288
  pass
289
  return False, str(e)
290
 
291
+
292
  def convert_to_wav_if_needed(input_path):
293
  input_path = str(input_path)
294
  lower = input_path.lower()
 
316
  except Exception:
317
  pass
318
 
319
+ # ffmpeg fallback
 
 
 
320
  diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
321
  diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
322
  diagnostics = []
 
324
  out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
325
  out_wav.close()
326
  success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
327
+ diagnostics.append(
328
+ f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n"
329
+ )
330
  if success:
331
  try:
332
  with open(diag_log, "w", encoding="utf-8") as fh:
 
346
  except Exception:
347
  pass
348
 
349
+ # final diagnostics
350
  try:
351
+ fp = subprocess.run(
352
+ ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
353
+ capture_output=True,
354
+ text=True,
355
+ timeout=10,
356
+ )
357
  diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
358
  except Exception as e:
359
  diagnostics.append("ffprobe failed: " + str(e))
 
373
  except Exception as e:
374
  raise Exception(f"Conversion failed; diagnostics write error: {e}")
375
 
376
+ raise Exception(
377
+ f"Could not convert file to WAV. Diagnostics saved to: {diag_log}"
378
+ )
379
+
380
 
381
  # ---------- Whisper model cache ----------
382
  MODEL_CACHE = {}
383
 
384
+
385
  def get_whisper_model(name):
386
  if name not in MODEL_CACHE:
387
  print(f"DEBUG: loading whisper model '{name}'", flush=True)
388
  MODEL_CACHE[name] = whisper.load_model(name)
389
  return MODEL_CACHE[name]
390
 
391
+
392
  # ---------- Main transcription generator ----------
393
+ def transcribe_multiple(
394
+ audio_files,
395
+ model_name,
396
+ advanced_options,
397
+ merge_checkbox,
398
+ zip_file=None,
399
+ zip_password=None,
400
+ enable_memory=False,
401
+ ):
402
+ """
403
+ Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
404
+ """
405
  log = []
406
  transcripts = []
407
  word_file_path = None
 
411
  # initial yield
412
  yield "", "", None, 0
413
 
414
+ # cleanup previous temp dir
415
  if os.path.exists(temp_extract_dir):
416
  try:
417
  shutil.rmtree(temp_extract_dir)
 
433
  log.append("Incorrect zip password")
434
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
435
  return
436
+ exts = [
437
+ ".mp3",
438
+ ".wav",
439
+ ".aac",
440
+ ".flac",
441
+ ".ogg",
442
+ ".m4a",
443
+ ".dat",
444
+ ".dct",
445
+ ]
446
  count = 0
447
  for info in zf.infolist():
448
  if info.is_dir():
 
454
  except Exception as e:
455
  log.append(f"Error extracting {info.filename}: {e}")
456
  continue
457
+ p = os.path.normpath(
458
+ os.path.join(temp_extract_dir, info.filename)
459
+ )
460
  if os.path.exists(p):
461
  extracted_audio_paths.append(p)
462
  count += 1
 
503
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
504
  return
505
 
506
+ # load model
507
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
508
  try:
509
  model = get_whisper_model(model_name)
 
518
  for p in paths:
519
  idx += 1
520
  log.append(f"Processing file ({idx}/{total}): {p}")
521
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
522
+ 5 + (idx - 1) * 80 / max(1, total)
523
+ )
524
 
525
  wav = None
526
  try:
 
528
  log.append(f"Converted to WAV: {wav}")
529
  except Exception as e:
530
  log.append(f"Conversion failed for {p}: {e}")
531
+ transcripts.append(
532
+ f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}"
533
+ )
534
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
535
+ 5 + idx * 80 / max(1, total)
536
+ )
537
  continue
538
 
539
+ try:
540
+ whisper_opts = {}
541
+ if isinstance(advanced_options, dict):
542
+ whisper_opts.update(advanced_options)
543
+
544
+ result = model.transcribe(wav, **whisper_opts)
545
+ text = result.get("text", "").strip()
546
+ log.append(f"Transcribed: {len(text)} chars")
547
+
548
+ if enable_memory:
549
+ text = memory_correct_text(text)
550
+ text = postprocess_transcript(text)
551
+ transcripts.append(
552
+ f"FILE: {os.path.basename(p)}\n{text}\n"
553
+ )
554
+
555
+ if enable_memory:
556
+ try:
557
+ update_memory_with_transcript(text)
558
+ log.append("Memory updated.")
559
+ except Exception:
560
+ pass
561
+
562
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
563
+ 10 + idx * 85 / max(1, total)
564
+ )
565
+ except Exception as e:
566
+ log.append(f"Transcription failed for {p}: {e}")
567
+ transcripts.append(
568
+ f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}"
569
+ )
570
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
571
+ 10 + idx * 85 / max(1, total)
572
+ )
573
+ continue
574
+ finally:
575
+ try:
576
+ if wav and os.path.exists(wav):
577
+ tmpdir = tempfile.gettempdir()
578
+ if (
579
+ os.path.commonpath([tmpdir, os.path.abspath(wav)])
580
+ == tmpdir
581
+ and not p.lower().endswith(".wav")
582
+ ):
583
+ os.unlink(wav)
584
+ except Exception:
585
+ pass
586
 
587
+ # final merge option
588
+ if merge_checkbox:
589
+ try:
590
+ merged_text = "\n\n".join(transcripts)
591
+ word_file_path = save_as_word(merged_text)
592
+ log.append(f"Merged transcript saved: {word_file_path}")
593
+ except Exception as e:
594
+ log.append(f"Failed to save merged file: {e}")
595
+ word_file_path = None
596
 
597
+ # final yield
598
+ yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
599
 
600
+ # cleanup extracted dir
601
+ try:
602
+ if os.path.exists(temp_extract_dir):
603
+ shutil.rmtree(temp_extract_dir)
604
+ log.append("Cleaned temporary extraction dir.")
605
+ except Exception:
606
+ pass
607
 
608
 
609
+ # ----------------------- Gradio UI -----------------------
610
+ def run_transcription_wrapper(
611
+ files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options_state
612
+ ):
613
+ audio_input = files
614
+ zip_path = None
615
+ if zip_file:
616
+ if isinstance(zip_file, (str, os.PathLike)):
617
+ zip_path = str(zip_file)
618
+ elif hasattr(zip_file, "name"):
619
+ zip_path = zip_file.name
620
+ elif isinstance(zip_file, dict) and zip_file.get("name"):
621
+ zip_path = zip_file["name"]
622
+ adv = {}
623
+ return transcribe_multiple(
624
+ audio_input,
625
+ model_name,
626
+ adv,
627
+ merge_checkbox=merge,
628
+ zip_file=zip_path,
629
+ zip_password=zip_password,
630
+ enable_memory=enable_memory,
631
+ )
632
+
633
+
634
+ print("DEBUG: building Gradio Blocks", flush=True)
635
+ demo = gr.Blocks()
636
+
637
+ with demo:
638
+ gr.Markdown("## Whisper Transcription (Spaces-ready)")
639
+ with gr.Row():
640
+ with gr.Column(scale=2):
641
+ file_input = gr.File(
642
+ label="Upload audio files (or zip)",
643
+ file_count="multiple",
644
+ type="filepath",
645
+ )
646
+ zip_input = gr.File(
647
+ label="Optional: Upload zip file containing audio",
648
+ file_count="single",
649
+ type="filepath",
650
+ )
651
+ zip_password = gr.Textbox(
652
+ label="Zip password (if any)",
653
+ placeholder="password (optional)",
654
+ )
655
+ model_select = gr.Dropdown(
656
+ choices=["small", "medium", "large", "base"],
657
+ value="small",
658
+ label="Whisper model",
659
+ )
660
+ merge_checkbox = gr.Checkbox(
661
+ label="Merge transcripts to a single .docx (downloadable)",
662
+ value=True,
663
+ )
664
+ memory_checkbox = gr.Checkbox(
665
+ label="Enable persistent memory (word/phrase correction)",
666
+ value=False,
667
+ )
668
+ submit = gr.Button("Transcribe")
669
+ with gr.Column(scale=3):
670
+ logs = gr.Textbox(label="Logs (streaming)", lines=12)
671
+ transcripts_out = gr.Textbox(label="Transcripts (streaming)", lines=12)
672
+ download_file = gr.File(label="Merged .docx (when enabled)")
673
+ progress_num = gr.Number(value=0, label="Progress (%)")
674
+
675
+ submit.click(
676
+ fn=run_transcription_wrapper,
677
+ inputs=[
678
+ file_input,
679
+ model_select,
680
+ merge_checkbox,
681
+ zip_input,
682
+ zip_password,
683
+ memory_checkbox,
684
+ gr.State({}),
685
+ ],
686
+ outputs=[logs, transcripts_out, download_file, progress_num],
687
+ )
688
+
689
+ # ---------- Launch ----------
690
  if __name__ == "__main__":
691
  port = int(os.environ.get("PORT", 7860))
692
  print("DEBUG: launching Gradio on port", port, flush=True)
 
696
  print("FATAL: demo.launch failed:", e, flush=True)
697
  traceback.print_exc()
698
  raise