staraks commited on
Commit
68f2294
·
verified ·
1 Parent(s): 97cd142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1141 -0
app.py CHANGED
@@ -1,3 +1,1116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
3
  # Clean, single-version file for Hugging Face Spaces.
@@ -934,3 +2047,31 @@ if __name__ == "__main__":
934
  print("FATAL: demo.launch failed:", e, flush=True)
935
  traceback.print_exc()
936
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Skip to content
2
+ Chat history
3
+
4
+ You said:
5
+ # app.py
6
+ # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
7
+ # Clean, single-version file for Hugging Face Spaces.
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import shutil
13
+ import tempfile
14
+ import subprocess
15
+ import traceback
16
+ import threading
17
+ import re
18
+ from difflib import get_close_matches
19
+
20
+ # Force unbuffered output so container logs show prints immediately
21
+ os.environ["PYTHONUNBUFFERED"] = "1"
22
+
23
+ print("DEBUG: app.py bootstrap starting", flush=True)
24
+
25
+ # Third-party imports (must be installed in the environment)
26
+ try:
27
+ from docx import Document
28
+ import whisper
29
+ import gradio as gr
30
+ import pyzipper
31
+ from pydub import AudioSegment
32
+ except Exception as e:
33
+ print("FATAL: import error for third-party libs:", e, flush=True)
34
+ traceback.print_exc()
35
+ raise
36
+
37
+ print("DEBUG: imports OK", flush=True)
38
+
39
+ # ---------- Config ----------
40
+ MEMORY_FILE = "memory.json"
41
+ MEMORY_LOCK = threading.Lock()
42
+ MIN_WAV_SIZE = 200 # bytes
43
+
44
+ # Small ffmpeg fallback grid (hybrid conversion)
45
+ FFMPEG_CANDIDATES = [
46
+ ("s16le", 16000, 1),
47
+ ("s16le", 44100, 2),
48
+ ("pcm_s16le", 16000, 1),
49
+ ("pcm_s16le", 44100, 2),
50
+ ("mulaw", 8000, 1),
51
+ ]
52
+ # ----------------------------
53
+
54
+ # ---------- Memory helpers ----------
55
+ def load_memory():
56
+ try:
57
+ if os.path.exists(MEMORY_FILE):
58
+ with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
59
+ return json.load(fh)
60
+ except Exception:
61
+ pass
62
+ mem = {"words": {}, "phrases": {}}
63
+ try:
64
+ with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
65
+ json.dump(mem, fh, ensure_ascii=False, indent=2)
66
+ except Exception:
67
+ pass
68
+ return mem
69
+
70
+
71
+ def save_memory(mem):
72
+ with MEMORY_LOCK:
73
+ with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
74
+ json.dump(mem, fh, ensure_ascii=False, indent=2)
75
+
76
+
77
+ memory = load_memory()
78
+ print(
79
+ "DEBUG: memory loaded (words=%d phrases=%d)"
80
+ % (len(memory.get("words", {})), len(memory.get("phrases", {}))),
81
+ flush=True,
82
+ )
83
+
84
+ # ---------- Postprocessing ----------
85
+ MEDICAL_ABBREVIATIONS = {
86
+ "pt": "patient",
87
+ "dx": "diagnosis",
88
+ "hx": "history",
89
+ "sx": "symptoms",
90
+ "c/o": "complains of",
91
+ "bp": "blood pressure",
92
+ "hr": "heart rate",
93
+ "o2": "oxygen",
94
+ "r/o": "rule out",
95
+ "adm": "admit",
96
+ "disch": "discharge",
97
+ }
98
+
99
+ DRUG_NORMALIZATION = {
100
+ "metformin": "Metformin",
101
+ "aspirin": "Aspirin",
102
+ "amoxicillin": "Amoxicillin",
103
+ }
104
+
105
+
106
+ def expand_abbreviations(text):
107
+ tokens = re.split(r"(\s+)", text)
108
+ out = []
109
+ for t in tokens:
110
+ key = t.lower().strip(".,;:")
111
+ if key in MEDICAL_ABBREVIATIONS:
112
+ trailing = ""
113
+ m = re.match(r"([A-Za-z0-9/]+)([.,;:]*)", t)
114
+ if m:
115
+ trailing = m.group(2) or ""
116
+ out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
117
+ else:
118
+ out.append(t)
119
+ return "".join(out)
120
+
121
+
122
+ def normalize_drugs(text):
123
+ for k, v in DRUG_NORMALIZATION.items():
124
+ text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
125
+ return text
126
+
127
+
128
+ def punctuation_and_capitalization(text):
129
+ text = text.strip()
130
+ if not text:
131
+ return text
132
+ if not re.search(r"[.?!]\s*$", text):
133
+ text = text.rstrip() + "."
134
+ parts = re.split(r"([.?!]\s+)", text)
135
+ out = []
136
+ for p in parts:
137
+ if p and not re.match(r"[.?!]\s+", p):
138
+ out.append(p.capitalize())
139
+ else:
140
+ out.append(p)
141
+ return "".join(out)
142
+
143
+
144
+ def postprocess_transcript(text, format_soap=False):
145
+ if not text:
146
+ return text
147
+ t = re.sub(r"\s+", " ", text).strip()
148
+ t = expand_abbreviations(t)
149
+ t = normalize_drugs(t)
150
+ t = punctuation_and_capitalization(t)
151
+ if format_soap:
152
+ sentences = re.split(r"(?<=[.?!])\s+", t)
153
+ subj = sentences[0] if len(sentences) >= 1 else ""
154
+ obj = sentences[1] if len(sentences) >= 2 else ""
155
+ assessment = ""
156
+ for kw in ["diagnosis", "dx", "rule out", "r/o", "probable"]:
157
+ if kw in t.lower():
158
+ assessment = "Assessment: " + subj
159
+ break
160
+ soap = (
161
+ f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
162
+ )
163
+ return soap
164
+ return t
165
+
166
+
167
+ # ---------- Memory utilities ----------
168
+ def extract_words_and_phrases(text):
169
+ # basic tokenization for words; phrases = sentences
170
+ words = re.findall(r"[A-Za-z0-9\-']+", text)
171
+ sentences = [
172
+ s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()
173
+ ]
174
+ return [w for w in words if w.strip()], sentences
175
+
176
+
177
+ def update_memory_with_transcript(transcript):
178
+ global memory
179
+ words, sentences = extract_words_and_phrases(transcript)
180
+ changed = False
181
+ with MEMORY_LOCK:
182
+ for w in words:
183
+ lw = w.lower()
184
+ if lw in memory["words"]:
185
+ memory["words"][lw] += 1
186
+ else:
187
+ memory["words"][lw] = 1
188
+ changed = True
189
+ for s in sentences:
190
+ key = s.strip()
191
+ if key in memory["phrases"]:
192
+ memory["phrases"][key] += 1
193
+ else:
194
+ memory["phrases"][key] = 1
195
+ changed = True
196
+ if changed:
197
+ try:
198
+ with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
199
+ json.dump(memory, fh, ensure_ascii=False, indent=2)
200
+ except Exception:
201
+ pass
202
+
203
+
204
+ def memory_correct_text(text, min_ratio=0.85):
205
+ if not text or (not memory.get("words") and not memory.get("phrases")):
206
+ return text
207
+
208
+ def fix_word(w):
209
+ lw = w.lower()
210
+ if lw in memory["words"]:
211
+ return w
212
+ candidates = get_close_matches(
213
+ lw, memory["words"].keys(), n=1, cutoff=min_ratio
214
+ )
215
+ if candidates:
216
+ cand = candidates[0]
217
+ if w and w[0].isupper():
218
+ return cand.capitalize()
219
+ return cand
220
+ return w
221
+
222
+ tokens = re.split(r"(\W+)", text)
223
+ corrected_tokens = []
224
+ for tok in tokens:
225
+ if re.match(r"^[A-Za-z0-9\-']+$", tok):
226
+ corrected_tokens.append(fix_word(tok))
227
+ else:
228
+ corrected_tokens.append(tok)
229
+ corrected = "".join(corrected_tokens)
230
+
231
+ for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
232
+ low_phrase = phrase.lower()
233
+ if len(low_phrase) < 8:
234
+ continue
235
+ if low_phrase in corrected.lower():
236
+ corrected = re.sub(
237
+ re.escape(phrase), phrase, corrected, flags=re.IGNORECASE
238
+ )
239
+ return corrected
240
+
241
+
242
+ # ---------- File utilities ----------
243
+ def save_as_word(text, filename=None):
244
+ if filename is None:
245
+ filename = os.path.join(
246
+ tempfile.gettempdir(), "merged_transcripts.docx"
247
+ )
248
+ doc = Document()
249
+ doc.add_paragraph(text)
250
+ doc.save(filename)
251
+ return filename
252
+
253
+
254
+ # ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
255
+ def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
256
+ cmd = [
257
+ "ffmpeg",
258
+ "-hide_banner",
259
+ "-loglevel",
260
+ "error",
261
+ "-y",
262
+ "-f",
263
+ fmt,
264
+ "-ar",
265
+ str(sr),
266
+ "-ac",
267
+ str(ch),
268
+ "-i",
269
+ input_path,
270
+ out_path,
271
+ ]
272
+ try:
273
+ proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
274
+ if (
275
+ proc.returncode == 0
276
+ and os.path.exists(out_path)
277
+ and os.path.getsize(out_path) > MIN_WAV_SIZE
278
+ ):
279
+ return True, proc.stderr + proc.stdout
280
+ else:
281
+ try:
282
+ if os.path.exists(out_path):
283
+ os.unlink(out_path)
284
+ except Exception:
285
+ pass
286
+ return False, proc.stderr + proc.stdout
287
+ except Exception as e:
288
+ try:
289
+ if os.path.exists(out_path):
290
+ os.unlink(out_path)
291
+ except Exception:
292
+ pass
293
+ return False, str(e)
294
+
295
+
296
+ def convert_to_wav_if_needed(input_path):
297
+ input_path = str(input_path)
298
+ lower = input_path.lower()
299
+ if lower.endswith(".wav"):
300
+ return input_path
301
+
302
+ auto_err = ""
303
+ tmp = None
304
+ try:
305
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
306
+ tmp.close()
307
+ AudioSegment.from_file(input_path).export(tmp.name, format="wav")
308
+ if os.path.exists(tmp.name) and os.path.getsize(tmp.name) > MIN_WAV_SIZE:
309
+ return tmp.name
310
+ else:
311
+ try:
312
+ os.unlink(tmp.name)
313
+ except Exception:
314
+ pass
315
+ except Exception:
316
+ auto_err = traceback.format_exc()
317
+ try:
318
+ if tmp and os.path.exists(tmp.name):
319
+ os.unlink(tmp.name)
320
+ except Exception:
321
+ pass
322
+
323
+ # ffmpeg fallback
324
+ diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
325
+ diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
326
+ diagnostics = []
327
+ for fmt, sr, ch in FFMPEG_CANDIDATES:
328
+ out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
329
+ out_wav.close()
330
+ success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
331
+ diagnostics.append(
332
+ f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n"
333
+ )
334
+ if success:
335
+ try:
336
+ with open(diag_log, "w", encoding="utf-8") as fh:
337
+ fh.write("pydub auto error:\n")
338
+ fh.write(auto_err + "\n\n")
339
+ fh.write("Successful ffmpeg candidate:\n")
340
+ fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
341
+ fh.write("Diagnostics:\n")
342
+ fh.write("\n".join(diagnostics))
343
+ except Exception:
344
+ pass
345
+ return out_wav.name
346
+ else:
347
+ try:
348
+ if os.path.exists(out_wav.name):
349
+ os.unlink(out_wav.name)
350
+ except Exception:
351
+ pass
352
+
353
+ # final diagnostics
354
+ try:
355
+ fp = subprocess.run(
356
+ ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
357
+ capture_output=True,
358
+ text=True,
359
+ timeout=10,
360
+ )
361
+ diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
362
+ except Exception as e:
363
+ diagnostics.append("ffprobe failed: " + str(e))
364
+ try:
365
+ with open(input_path, "rb") as fh:
366
+ head = fh.read(512)
367
+ diagnostics.append("HEX PREVIEW:\n" + head.hex())
368
+ except Exception as e:
369
+ diagnostics.append("could not read head: " + str(e))
370
+
371
+ try:
372
+ with open(diag_log, "w", encoding="utf-8") as fh:
373
+ fh.write("pydub auto error:\n")
374
+ fh.write(auto_err + "\n\n")
375
+ fh.write("Full diagnostics:\n\n")
376
+ fh.write("\n\n".join(diagnostics))
377
+ except Exception as e:
378
+ raise Exception(f"Conversion failed; diagnostics write error: {e}")
379
+
380
+ raise Exception(
381
+ f"Could not convert file to WAV. Diagnostics saved to: {diag_log}"
382
+ )
383
+
384
+
385
+ # ---------- Whisper model cache ----------
386
+ MODEL_CACHE = {}
387
+
388
+
389
+ def get_whisper_model(name):
390
+ if name not in MODEL_CACHE:
391
+ print(f"DEBUG: loading whisper model '{name}'", flush=True)
392
+ MODEL_CACHE[name] = whisper.load_model(name)
393
+ return MODEL_CACHE[name]
394
+
395
+
396
+ # ---------- Main transcription generator ----------
397
+ def transcribe_multiple(
398
+ audio_files,
399
+ model_name,
400
+ advanced_options,
401
+ merge_checkbox,
402
+ zip_file=None,
403
+ zip_password=None,
404
+ enable_memory=False,
405
+ ):
406
+ """
407
+ Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
408
+ """
409
+ log = []
410
+ transcripts = []
411
+ word_file_path = None
412
+ temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
413
+ extracted_audio_paths = []
414
+
415
+ # initial yield
416
+ yield "", "", None, 0
417
+
418
+ # cleanup previous temp dir
419
+ if os.path.exists(temp_extract_dir):
420
+ try:
421
+ shutil.rmtree(temp_extract_dir)
422
+ log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
423
+ except Exception:
424
+ pass
425
+
426
+ # handle zip
427
+ if zip_file:
428
+ log.append(f"Processing zip: {zip_file}")
429
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
430
+ try:
431
+ os.makedirs(temp_extract_dir, exist_ok=True)
432
+ with pyzipper.ZipFile(zip_file, "r") as zf:
433
+ if zip_password:
434
+ try:
435
+ zf.setpassword(zip_password.encode())
436
+ except Exception:
437
+ log.append("Incorrect zip password")
438
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
439
+ return
440
+ exts = [
441
+ ".mp3",
442
+ ".wav",
443
+ ".aac",
444
+ ".flac",
445
+ ".ogg",
446
+ ".m4a",
447
+ ".dat",
448
+ ".dct",
449
+ ]
450
+ count = 0
451
+ for info in zf.infolist():
452
+ if info.is_dir():
453
+ continue
454
+ _, ext = os.path.splitext(info.filename)
455
+ if ext.lower() in exts:
456
+ try:
457
+ zf.extract(info, path=temp_extract_dir)
458
+ except Exception as e:
459
+ log.append(f"Error extracting {info.filename}: {e}")
460
+ continue
461
+ p = os.path.normpath(
462
+ os.path.join(temp_extract_dir, info.filename)
463
+ )
464
+ if os.path.exists(p):
465
+ extracted_audio_paths.append(p)
466
+ count += 1
467
+ log.append(f"Extracted: {info.filename}")
468
+ if count == 0:
469
+ log.append("No supported audio in zip.")
470
+ try:
471
+ shutil.rmtree(temp_extract_dir)
472
+ except Exception:
473
+ pass
474
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
475
+ return
476
+ except pyzipper.BadZipFile:
477
+ log.append("Invalid zip file.")
478
+ try:
479
+ shutil.rmtree(temp_extract_dir)
480
+ except Exception:
481
+ pass
482
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
483
+ return
484
+ except Exception as e:
485
+ log.append(f"Zip processing error: {e}")
486
+ try:
487
+ shutil.rmtree(temp_extract_dir)
488
+ except Exception:
489
+ pass
490
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
491
+ return
492
+
493
+ # collect audio file paths
494
+ paths = []
495
+ if extracted_audio_paths:
496
+ paths.extend(extracted_audio_paths)
497
+ if audio_files:
498
+ if isinstance(audio_files, (list, tuple)):
499
+ for a in audio_files:
500
+ if a:
501
+ paths.append(a)
502
+ elif isinstance(audio_files, str):
503
+ paths.append(audio_files)
504
+
505
+ if not paths:
506
+ log.append("No audio files provided.")
507
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
508
+ return
509
+
510
+ # load model
511
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
512
+ try:
513
+ model = get_whisper_model(model_name)
514
+ log.append(f"Loaded Whisper model: {model_name}")
515
+ except Exception as e:
516
+ log.append(f"Failed to load model {model_name}: {e}")
517
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
518
+ return
519
+
520
+ total = len(paths)
521
+ idx = 0
522
+ for p in paths:
523
+ idx += 1
524
+ log.append(f"Processing file ({idx}/{total}): {p}")
525
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
526
+ 5 + (idx - 1) * 80 / max(1, total)
527
+ )
528
+
529
+ wav = None
530
+ try:
531
+ wav = convert_to_wav_if_needed(p)
532
+ log.append(f"Converted to WAV: {wav}")
533
+ except Exception as e:
534
+ log.append(f"Conversion failed for {p}: {e}")
535
+ transcripts.append(
536
+ f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}"
537
+ )
538
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
539
+ 5 + idx * 80 / max(1, total)
540
+ )
541
+ continue
542
+
543
+ try:
544
+ whisper_opts = {}
545
+ if isinstance(advanced_options, dict):
546
+ whisper_opts.update(advanced_options)
547
+
548
+ result = model.transcribe(wav, **whisper_opts)
549
+ text = result.get("text", "").strip()
550
+ log.append(f"Transcribed: {len(text)} chars")
551
+
552
+ if enable_memory:
553
+ text = memory_correct_text(text)
554
+ text = postprocess_transcript(text)
555
+ transcripts.append(
556
+ f"FILE: {os.path.basename(p)}\n{text}\n"
557
+ )
558
+
559
+ if enable_memory:
560
+ try:
561
+ update_memory_with_transcript(text)
562
+ log.append("Memory updated.")
563
+ except Exception:
564
+ pass
565
+
566
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
567
+ 10 + idx * 85 / max(1, total)
568
+ )
569
+ except Exception as e:
570
+ log.append(f"Transcription failed for {p}: {e}")
571
+ transcripts.append(
572
+ f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}"
573
+ )
574
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
575
+ 10 + idx * 85 / max(1, total)
576
+ )
577
+ continue
578
+ finally:
579
+ try:
580
+ if wav and os.path.exists(wav):
581
+ tmpdir = tempfile.gettempdir()
582
+ if (
583
+ os.path.commonpath([tmpdir, os.path.abspath(wav)])
584
+ == tmpdir
585
+ and not p.lower().endswith(".wav")
586
+ ):
587
+ os.unlink(wav)
588
+ except Exception:
589
+ pass
590
+
591
+ # final merge option
592
+ if merge_checkbox:
593
+ try:
594
+ merged_text = "\n\n".join(transcripts)
595
+ word_file_path = save_as_word(merged_text)
596
+ log.append(f"Merged transcript saved: {word_file_path}")
597
+ except Exception as e:
598
+ log.append(f"Failed to save merged file: {e}")
599
+ word_file_path = None
600
+
601
+ # final yield
602
+ yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
603
+
604
+ # cleanup extracted dir
605
+ try:
606
+ if os.path.exists(temp_extract_dir):
607
+ shutil.rmtree(temp_extract_dir)
608
+ log.append("Cleaned temporary extraction dir.")
609
+ except Exception:
610
+ pass
611
+
612
+
613
+ # ----------------------- Gradio wrapper (streaming) -----------------------
614
+ def run_transcription_wrapper(
615
+ files,
616
+ model_name,
617
+ merge,
618
+ zip_file,
619
+ zip_password,
620
+ use_default_zip_pass,
621
+ default_zip_password,
622
+ enable_memory,
623
+ advanced_options_state,
624
+ ):
625
+ """
626
+ Gradio callback that streams results from transcribe_multiple.
627
+ This MUST be a generator (use yield) so Gradio receives 4 outputs,
628
+ not a single generator object.
629
+ """
630
+ try:
631
+ audio_input = files
632
+
633
+ # Normalize zip path
634
+ zip_path = None
635
+ if zip_file:
636
+ if isinstance(zip_file, (str, os.PathLike)):
637
+ zip_path = str(zip_file)
638
+ elif hasattr(zip_file, "name"):
639
+ zip_path = zip_file.name
640
+ elif isinstance(zip_file, dict) and zip_file.get("name"):
641
+ zip_path = zip_file["name"]
642
+
643
+ # Decide final zip password
644
+ if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
645
+ final_zip_password = default_zip_password
646
+ else:
647
+ final_zip_password = zip_password
648
+
649
+ adv = {} # placeholder for future advanced options
650
+
651
+ # Stream results from the core generator
652
+ for logs, transcripts, word_path, percent in transcribe_multiple(
653
+ audio_input,
654
+ model_name,
655
+ adv,
656
+ merge_checkbox=merge,
657
+ zip_file=zip_path,
658
+ zip_password=final_zip_password,
659
+ enable_memory=enable_memory,
660
+ ):
661
+ yield logs, transcripts, word_path, percent
662
+
663
+ except Exception:
664
+ tb = traceback.format_exc()
665
+ logs = f"EXCEPTION in run_transcription_wrapper:\n{tb}"
666
+ transcripts = "ERROR: transcription did not start or failed unexpectedly."
667
+ yield logs, transcripts, None, 100
668
+
669
+
670
+ print("DEBUG: building Gradio Blocks", flush=True)
671
+
672
+ # ---------- Minimal, easy-to-use UI ----------
673
+ with gr.Blocks(title="Whisper Transcriber") as demo:
674
+ gr.Markdown(
675
+ "## Whisper Transcriber\n"
676
+ "Upload audio files or a ZIP on the left and click **Transcribe**.\n"
677
+ "Transcript, progress, download, and logs appear on the right."
678
+ )
679
+
680
+ with gr.Row():
681
+ # LEFT: Inputs (small boxes, simple controls)
682
+ with gr.Column(scale=1):
683
+ gr.Markdown("### Input")
684
+
685
+ file_input = gr.File(
686
+ label="Audio files",
687
+ file_count="multiple",
688
+ type="filepath",
689
+ height=60, # small box
690
+ )
691
+
692
+ zip_input = gr.File(
693
+ label="ZIP with audio (optional)",
694
+ file_count="single",
695
+ type="filepath",
696
+ height=60, # small box
697
+ )
698
+
699
+ use_default_zip_pass = gr.Checkbox(
700
+ label="Use default ZIP password",
701
+ value=False,
702
+ )
703
+
704
+ default_zip_password = gr.Textbox(
705
+ label="Default ZIP password",
706
+ value="dietcoke1", # you can change this
707
+ interactive=True,
708
+ )
709
+
710
+ zip_password = gr.Textbox(
711
+ label="ZIP password (override)",
712
+ placeholder="If empty, default password will be used",
713
+ )
714
+
715
+ model_select = gr.Dropdown(
716
+ choices=["small", "medium", "large", "base"],
717
+ value="small",
718
+ label="Whisper model",
719
+ )
720
+
721
+ merge_checkbox = gr.Checkbox(
722
+ label="Merge all transcripts into one .docx",
723
+ value=True,
724
+ )
725
+
726
+ memory_checkbox = gr.Checkbox(
727
+ label="Enable correction memory",
728
+ value=False,
729
+ )
730
+
731
+ submit = gr.Button("Transcribe", variant="primary")
732
+
733
+ # RIGHT: Outputs (Transcript → Progress → Download → Logs)
734
+ with gr.Column(scale=1):
735
+ gr.Markdown("### Output")
736
+
737
+ transcripts_out = gr.Textbox(
738
+ label="Transcript",
739
+ lines=18,
740
+ interactive=False,
741
+ )
742
+
743
+ progress_num = gr.Slider(
744
+ minimum=0,
745
+ maximum=100,
746
+ value=0,
747
+ step=1,
748
+ label="Progress (%)",
749
+ interactive=False,
750
+ )
751
+
752
+ download_file = gr.File(
753
+ label="Merged .docx (when available)"
754
+ )
755
+
756
+ logs = gr.Textbox(
757
+ label="Logs",
758
+ lines=10,
759
+ interactive=False,
760
+ )
761
+
762
+ submit.click(
763
+ fn=run_transcription_wrapper,
764
+ inputs=[
765
+ file_input,
766
+ model_select,
767
+ merge_checkbox,
768
+ zip_input,
769
+ zip_password,
770
+ use_default_zip_pass,
771
+ default_zip_password,
772
+ memory_checkbox,
773
+ gr.State({}),
774
+ ],
775
+ # The function yields: logs, transcripts, download_file, progress
776
+ outputs=[logs, transcripts_out, download_file, progress_num],
777
+ )
778
+
779
+ # ---------- Launch ----------
780
+ if __name__ == "__main__":
781
+ port = int(os.environ.get("PORT", 7860))
782
+ print("DEBUG: launching Gradio on port", port, flush=True)
783
+ try:
784
+ demo.queue().launch(server_name="0.0.0.0", server_port=port)
785
+ except Exception as e:
786
+ print("FATAL: demo.launch failed:", e, flush=True)
787
+ traceback.print_exc()
788
+ raise
789
+ ChatGPT said:
790
+
791
+ Thought for 7s
792
+ Nice, thanks — this is a solid, well-structured transcription app. I ran a careful code review and made a few targeted fixes and improvements you can drop into your file to make it more robust and reliable (especially around ffmpeg fallback conversion, ZIP password handling, temp-file cleanup, and a couple of small safety/usability items).
793
+
794
+ Below I list the important issues I found, why they matter, and then provide the concrete patched code snippets you can replace in your file. I kept changes minimal and backward-compatible.
795
+
796
+ Major issues & fixes (summary)
797
+ ffmpeg conversion command was fragile
798
+ You used -f <fmt> before -i which tells ffmpeg the input format is raw (s16le/pcm), and that often fails for containerized inputs. Better: call ffmpeg -i input -ar <sr> -ac <ch> out.wav (let ffmpeg auto-detect input). If you still want raw-format attempts, do them explicitly only when necessary. The patch changes the command so ffmpeg autodetects input and sets output sampling/channel only.
799
+
800
+ Diagnostics/logging & cleanup
801
+ If conversion fails we should remove partial files and write diagnostics safely. I preserved your diagnostics but improved removal and error messages.
802
+
803
+ ZIP password handling with pyzipper
804
+ zf.setpassword(...) simply sets a default password — it does not throw on set. Extraction will raise if password is wrong. I changed code to try extract and catch RuntimeError/RuntimeError/pyzipper.BadZipFile where appropriate and produce a clearer "Wrong password" message.
805
+
806
+ Temporary WAV deletion logic
807
+ The logic that deletes intermediate WAV relied on checking os.path.commonpath([tmpdir, os.path.abspath(wav)]) == tmpdir — that may raise if paths are on different drives. I hardened that check and wrapped in try/except.
808
+
809
+ Model loading
810
+ I kept your model cache but suggested an optional device override if you want to put it on CPU/GPU. (Small note: confirm whisper.load_model(name) is the right API for the whisper package/version you are using.)
811
+
812
+ Minor UI suggestion
813
+ Consider setting file_input's type="file" or keeping "filepath" with a fallback handler — your run_transcription_wrapper already copes with zip_file being file-like or path-like, so it's fine. (Left unchanged.)
814
+
815
+ Replace these functions with the patched versions
816
+ Paste these function definitions into your file to replace the originals for _ffmpeg_convert, convert_to_wav_if_needed, and the ZIP extraction block in transcribe_multiple. I preserved your variable names and diagnostics as much as possible.
817
+
818
+ # ---------- improved ffmpeg convert ----------
819
+ def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
820
+ """
821
+ Use ffmpeg to convert input_path -> out_path.
822
+ We let ffmpeg autodetect input format (so don't force -f except if fmt == 'raw').
823
+ If fmt is one of the raw formats (s16le/pcm_s16le/mulaw) we treat it as raw input;
824
+ otherwise we let ffmpeg autodetect and only set sample rate/channels on output.
825
+ """
826
+ try:
827
+ cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
828
+
829
+ # if fmt indicates raw input format, we need to tell ffmpeg about it
830
+ if fmt in ("s16le", "pcm_s16le", "mulaw"):
831
+ # raw PCM input requires -f and input format description before -i
832
+ cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path]
833
+ # output will be WAV (ffmpeg determines container from out_path)
834
+ cmd += [out_path]
835
+ else:
836
+ # For most candidate attempts, let ffmpeg autodetect input format.
837
+ # Request output sample rate/channels explicitly.
838
+ cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
839
+
840
+ proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
841
+ stdout_stderr = proc.stdout + proc.stderr
842
+ if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
843
+ return True, stdout_stderr
844
+ else:
845
+ # remove partial output if exists
846
+ try:
847
+ if os.path.exists(out_path):
848
+ os.unlink(out_path)
849
+ except Exception:
850
+ pass
851
+ return False, stdout_stderr
852
+ except Exception as e:
853
+ try:
854
+ if os.path.exists(out_path):
855
+ os.unlink(out_path)
856
+ except Exception:
857
+ pass
858
+ return False, str(e)
859
+
860
+
861
+ def convert_to_wav_if_needed(input_path):
862
+ input_path = str(input_path)
863
+ lower = input_path.lower()
864
+ if lower.endswith(".wav"):
865
+ return input_path
866
+
867
+ auto_err = ""
868
+ tmp = None
869
+ try:
870
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
871
+ tmp.close()
872
+ # let pydub autodetect format for most files
873
+ AudioSegment.from_file(input_path).export(tmp.name, format="wav")
874
+ if os.path.exists(tmp.name) and os.path.getsize(tmp.name) > MIN_WAV_SIZE:
875
+ return tmp.name
876
+ else:
877
+ # bad export, remove and fall through to ffmpeg tries
878
+ try:
879
+ os.unlink(tmp.name)
880
+ except Exception:
881
+ pass
882
+ except Exception:
883
+ auto_err = traceback.format_exc()
884
+ try:
885
+ if tmp and os.path.exists(tmp.name):
886
+ os.unlink(tmp.name)
887
+ except Exception:
888
+ pass
889
+
890
+ # ffmpeg fallback
891
+ diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
892
+ diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
893
+ diagnostics = []
894
+ for fmt, sr, ch in FFMPEG_CANDIDATES:
895
+ out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
896
+ out_wav.close()
897
+ success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
898
+ diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
899
+ if success:
900
+ try:
901
+ with open(diag_log, "w", encoding="utf-8") as fh:
902
+ fh.write("pydub auto error:\n")
903
+ fh.write(auto_err + "\n\n")
904
+ fh.write("Successful ffmpeg candidate:\n")
905
+ fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
906
+ fh.write("Diagnostics:\n")
907
+ fh.write("\n".join(diagnostics))
908
+ except Exception:
909
+ pass
910
+ return out_wav.name
911
+ else:
912
+ try:
913
+ if os.path.exists(out_wav.name):
914
+ os.unlink(out_wav.name)
915
+ except Exception:
916
+ pass
917
+
918
+ # final diagnostics: ffprobe + file head
919
+ try:
920
+ fp = subprocess.run(
921
+ ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
922
+ capture_output=True,
923
+ text=True,
924
+ timeout=10,
925
+ )
926
+ diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
927
+ except Exception as e:
928
+ diagnostics.append("ffprobe failed: " + str(e))
929
+ try:
930
+ with open(input_path, "rb") as fh:
931
+ head = fh.read(512)
932
+ diagnostics.append("HEX PREVIEW:\n" + head.hex())
933
+ except Exception as e:
934
+ diagnostics.append("could not read head: " + str(e))
935
+
936
+ try:
937
+ with open(diag_log, "w", encoding="utf-8") as fh:
938
+ fh.write("pydub auto error:\n")
939
+ fh.write(auto_err + "\n\n")
940
+ fh.write("Full diagnostics:\n\n")
941
+ fh.write("\n\n".join(diagnostics))
942
+ except Exception as e:
943
+ # keep original behavior: surface a readable exception
944
+ raise Exception(f"Conversion failed; diagnostics write error: {e}")
945
+
946
+ raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
947
+ Replace the ZIP extraction block inside transcribe_multiple with this improved version
948
+ Find this earlier in transcribe_multiple:
949
+
950
+ if zip_file:
951
+ log.append(f"Processing zip: {zip_file}")
952
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
953
+ try:
954
+ os.makedirs(temp_extract_dir, exist_ok=True)
955
+ with pyzipper.ZipFile(zip_file, "r") as zf:
956
+ if zip_password:
957
+ try:
958
+ zf.setpassword(zip_password.encode())
959
+ except Exception:
960
+ log.append("Incorrect zip password")
961
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
962
+ return
963
+ exts = [...]
964
+ ...
965
+ Replace the whole if zip_file: block with this:
966
+
967
+ if zip_file:
968
+ log.append(f"Processing zip: {zip_file}")
969
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
970
+ try:
971
+ os.makedirs(temp_extract_dir, exist_ok=True)
972
+ with pyzipper.ZipFile(zip_file, "r") as zf:
973
+ # set password if provided (pyzipper accepts bytes)
974
+ if zip_password:
975
+ try:
976
+ zf.setpassword(zip_password.encode())
977
+ except Exception:
978
+ # setpassword typically doesn't fail, but just in case
979
+ log.append("Failed to set zip password (unexpected).")
980
+ exts = [
981
+ ".mp3",
982
+ ".wav",
983
+ ".aac",
984
+ ".flac",
985
+ ".ogg",
986
+ ".m4a",
987
+ ".dat",
988
+ ".dct",
989
+ ]
990
+ count = 0
991
+ for info in zf.infolist():
992
+ if info.is_dir():
993
+ continue
994
+ _, ext = os.path.splitext(info.filename)
995
+ if ext.lower() in exts:
996
+ try:
997
+ # attempt to extract each file; if wrong password, extraction will raise
998
+ zf.extract(info, path=temp_extract_dir)
999
+ except RuntimeError as e:
1000
+ # pyzipper may raise RuntimeError on bad password
1001
+ log.append(f"Password required or incorrect for {info.filename}: {e}")
1002
+ continue
1003
+ except pyzipper.BadZipFile:
1004
+ log.append(f"Bad zip entry: {info.filename}")
1005
+ continue
1006
+ except Exception as e:
1007
+ log.append(f"Error extracting {info.filename}: {e}")
1008
+ continue
1009
+ p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
1010
+ if os.path.exists(p):
1011
+ extracted_audio_paths.append(p)
1012
+ count += 1
1013
+ log.append(f"Extracted: {info.filename}")
1014
+ if count == 0:
1015
+ log.append("No supported audio in zip.")
1016
+ try:
1017
+ shutil.rmtree(temp_extract_dir)
1018
+ except Exception:
1019
+ pass
1020
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
1021
+ return
1022
+ except pyzipper.BadZipFile:
1023
+ log.append("Invalid zip file.")
1024
+ try:
1025
+ shutil.rmtree(temp_extract_dir)
1026
+ except Exception:
1027
+ pass
1028
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
1029
+ return
1030
+ except Exception as e:
1031
+ log.append(f"Zip processing error: {e}")
1032
+ try:
1033
+ shutil.rmtree(temp_extract_dir)
1034
+ except Exception:
1035
+ pass
1036
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
1037
+ return
1038
+ Small hardening for temporary WAV deletion (replace your existing finally block)
1039
+ Replace the finally that deletes intermediate WAVs with this safer variant:
1040
+
1041
+ finally:
1042
+ try:
1043
+ if wav and os.path.exists(wav):
1044
+ tmpdir = tempfile.gettempdir()
1045
+ try:
1046
+ # Do not remove original .wav files that came from user (only remove conversion outputs)
1047
+ common = os.path.commonpath([os.path.abspath(tmpdir), os.path.abspath(wav)])
1048
+ if common == os.path.abspath(tmpdir) and not p.lower().endswith(".wav"):
1049
+ os.unlink(wav)
1050
+ except Exception:
1051
+ # fallback: only remove if it's in tempdir substring
1052
+ try:
1053
+ if tmpdir in os.path.abspath(wav) and not p.lower().endswith(".wav"):
1054
+ os.unlink(wav)
1055
+ except Exception:
1056
+ pass
1057
+ except Exception:
1058
+ pass
1059
+ Optional suggestions (non-blocking)
1060
+ Consider making MIN_WAV_SIZE a bit larger (e.g., 1024 bytes) — 200 bytes is tiny and can be a partial/invalid file.
1061
+
1062
+ Consider exposing a device option for the Whisper model (e.g. cpu vs cuda) and pass that into whisper.load_model.
1063
+
1064
+ default_zip_password is in your UI — don't keep real secrets there; make default generic or blank.
1065
+
1066
+ If you want progress more precise, you could measure durations of actual model transcribe calls and update percent accordingly.
1067
+
1068
+ If you run on Hugging Face Spaces, ensure the whisper package you installed matches your API expectations (some forks differ).
1069
+
1070
+ Why these changes help (TL;DR)
1071
+ Letting ffmpeg autodetect input prevents many conversion failures on real-world audio files (mp3, m4a, flac, etc.).
1072
+
1073
+ Proper diagnostic capture + safer temp-file cleanup prevents stuck files and makes debugging easier.
1074
+
1075
+ More robust zip/password extraction reduces silent failures and makes user feedback clearer.
1076
+
1077
+ If you want, I can:
1078
+
1079
+ produce a full patched app.py with the above replacements merged (ready to paste), or
1080
+
1081
+ convert model loading to allow mobile/CPU-only fallback and add a small "device" dropdown in the UI, or
1082
+
1083
+ update the UI to add a "select which extracted files to transcribe" step (you had requested that in earlier project notes).
1084
+
1085
+ Tell me which of those you want and I’ll give the full code in one paste.
1086
+
1087
+ You said:
1088
+ full code with option to memories old words
1089
+ ChatGPT said:
1090
+
1091
+ Thought for 5s
1092
+ Nice — below is a full, ready-to-paste app.py that:
1093
+
1094
+ Includes the improved ffmpeg fallback conversion and safer temp cleanup from my earlier suggestions.
1095
+
1096
+ Improves ZIP extraction & password handling.
1097
+
1098
+ Adds Memory management UI and functions so you can:
1099
+
1100
+ Enable/disable the correction memory (existing checkbox).
1101
+
1102
+ Import an existing memory file (JSON or newline word list).
1103
+
1104
+ Add a single word or phrase to memory manually.
1105
+
1106
+ Clear the memory.
1107
+
1108
+ View current memory (words & phrases counts).
1109
+
1110
+ Keeps the original transcription flow and UI layout; I only extended the left-side controls to include the memory operations.
1111
+
1112
+ Paste this into your repo (replace your current app.py). I preserved your original structure and most variable names.
1113
+
1114
  # app.py
1115
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
1116
  # Clean, single-version file for Hugging Face Spaces.
 
2047
  print("FATAL: demo.launch failed:", e, flush=True)
2048
  traceback.print_exc()
2049
  raise
2050
+ Notes & usage tips
2051
+ Importable memory formats:
2052
+
2053
+ JSON with { "words": {"word": count, ...}, "phrases": {"some phrase": count, ...} }
2054
+
2055
+ Plain text with each line a word (or word,count), or longer lines will be treated as phrases.
2056
+
2057
+ Enable correction memory should be checked to apply memory-based corrections during transcription.
2058
+
2059
+ View Memory shows the top 50 words/phrases (truncated for UI).
2060
+
2061
+ Clear Memory removes all stored words & phrases (and overwrites memory.json).
2062
+
2063
+ If you'd like I can:
2064
+
2065
+ produce a version that also shows a small popup listing extracted files from a ZIP and lets the user choose which to transcribe (you mentioned that previously), or
2066
+
2067
+ add a "device" dropdown (cpu/cuda) and pass to whisper.load_model, or
2068
+
2069
+ create a minimal tests script that runs conversion on a few sample audio files to validate ffmpeg fallbacks.
2070
+
2071
+ Which follow-up would you like?
2072
+
2073
+
2074
+
2075
+
2076
+ No file chosenNo file chosen
2077
+ ChatGPT can make mistakes. Check important info. See Cookie Preferences.