staraks commited on
Commit
a086336
·
verified ·
1 Parent(s): f461a9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -397
app.py CHANGED
@@ -1,415 +1,163 @@
1
- # app.py
2
- # Full Whisper transcription app for Hugging Face Spaces
3
- # - Advanced .dct conversion (ffmpeg heuristics + pydub)
4
- # - Zip extraction (pyzipper)
5
- # - Whisper transcription (cached)
6
- # - Live progress & logs to Gradio (generator)
7
- # - Persistent memory (word + phrase) with fuzzy correction
8
- # - Simple medical post-processing (abbrev expansion)
9
- # - Merge transcripts to .docx
10
- # - Binds to 0.0.0.0:$PORT and uses demo.queue().launch()
11
-
12
- import os
13
- import json
14
- import shutil
15
- import tempfile
16
- import subprocess
17
- import traceback
18
- import threading
19
- import re
20
- from difflib import get_close_matches
21
- from pathlib import Path
22
-
23
- from docx import Document
24
- import whisper
25
- import gradio as gr
26
- import pyzipper
27
- from pydub import AudioSegment
28
-
29
- # ---------- Config ----------
30
- MEMORY_FILE = "memory.json" # persistent memory in repo (will be written)
31
- MEMORY_LOCK = threading.Lock()
32
- DIAGNOSTICS_DIR_BASE = tempfile.gettempdir()
33
- MIN_WAV_SIZE = 200
34
- # ----------------------------
35
-
36
- # ensure memory file exists
37
- def load_memory():
38
  try:
39
- if os.path.exists(MEMORY_FILE):
40
- with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
41
- return json.load(fh)
42
- except Exception:
43
- pass
44
- # default structure
45
- mem = {"words": {}, "phrases": {}}
46
- try:
47
- with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
48
- json.dump(mem, fh, ensure_ascii=False, indent=2)
49
- except Exception:
50
- pass
51
- return mem
52
-
53
- def save_memory(mem):
54
- with MEMORY_LOCK:
55
- with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
56
- json.dump(mem, fh, ensure_ascii=False, indent=2)
57
-
58
- memory = load_memory()
59
-
60
- # ---------- Simple medical post-processing ----------
61
- MEDICAL_ABBREVIATIONS = {
62
- "pt": "patient",
63
- "dx": "diagnosis",
64
- "hx": "history",
65
- "sx": "symptoms",
66
- "c/o": "complains of",
67
- "bp": "blood pressure",
68
- "hr": "heart rate",
69
- "o2": "oxygen",
70
- "r/o": "rule out",
71
- "adm": "admit",
72
- "disch": "discharge",
73
- # extend as needed
74
- }
75
-
76
- DRUG_NORMALIZATION = {
77
- "metformin": "Metformin",
78
- "aspirin": "Aspirin",
79
- "amoxicillin": "Amoxicillin",
80
- }
81
-
82
- def expand_abbreviations(text):
83
- tokens = re.split(r'(\s+)', text)
84
- out = []
85
- for t in tokens:
86
- key = t.lower().strip(".,;:")
87
- if key in MEDICAL_ABBREVIATIONS:
88
- trailing = ''
89
- m = re.match(r'([A-Za-z0-9/]+)([.,;:]*)', t)
90
- if m:
91
- trailing = m.group(2) or ''
92
- out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
93
- else:
94
- out.append(t)
95
- return ''.join(out)
96
-
97
- def normalize_drugs(text):
98
- for k, v in DRUG_NORMALIZATION.items():
99
- text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
100
- return text
101
-
102
- def punctuation_and_capitalization(text):
103
- text = text.strip()
104
- if not text:
105
- return text
106
- if not re.search(r'[.?!]\s*$', text):
107
- text = text.rstrip() + '.'
108
- parts = re.split(r'([.?!]\s+)', text)
109
- out = []
110
- for p in parts:
111
- if p and not re.match(r'[.?!]\s+', p):
112
- out.append(p.capitalize())
113
- else:
114
- out.append(p)
115
- return ''.join(out)
116
-
117
- def postprocess_transcript(text, format_soap=False):
118
- if not text:
119
- return text
120
- t = re.sub(r'\s+', ' ', text).strip()
121
- t = expand_abbreviations(t)
122
- t = normalize_drugs(t)
123
- t = punctuation_and_capitalization(t)
124
- if format_soap:
125
- sentences = re.split(r'(?<=[.?!])\s+', t)
126
- subj = sentences[0] if len(sentences) >= 1 else ""
127
- obj = sentences[1] if len(sentences) >= 2 else ""
128
- assessment = ""
129
- for kw in ["diagnosis", "dx", "rule out", "r/o", "probable"]:
130
- if kw in t.lower():
131
- assessment = "Assessment: " + subj
132
- break
133
- soap = f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
134
- return soap
135
- return t
136
-
137
- # ---------- Memory utilities (word + phrase) ----------
138
- def extract_words_and_phrases(text):
139
- # basic tokenization for words; phrases = sentences
140
- words = re.findall(r"[A-Za-z0-9\-']+", text)
141
- sentences = [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
142
- return [w for w in words if w.strip()], sentences
143
-
144
- def update_memory_with_transcript(transcript):
145
- global memory
146
- words, sentences = extract_words_and_phrases(transcript)
147
- changed = False
148
- with MEMORY_LOCK:
149
- for w in words:
150
- lw = w.lower()
151
- if lw in memory["words"]:
152
- memory["words"][lw] += 1
153
- else:
154
- memory["words"][lw] = 1
155
- changed = True
156
- for s in sentences:
157
- key = s.strip()
158
- if key in memory["phrases"]:
159
- memory["phrases"][key] += 1
160
- else:
161
- memory["phrases"][key] = 1
162
- changed = True
163
- if changed:
164
- try:
165
- with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
166
- json.dump(memory, fh, ensure_ascii=False, indent=2)
167
- except Exception:
168
- pass
169
-
170
- def memory_correct_text(text, min_ratio=0.85):
171
- """
172
- Correct words/phrases in text using memory.
173
- - Word-level: uses difflib.get_close_matches against known memory words.
174
- - Phrase-level: tries to match stored phrases (exact or close substring).
175
- """
176
- if not text or (not memory.get("words") and not memory.get("phrases")):
177
- return text
178
-
179
- # word-level corrections
180
- def fix_word(w):
181
- lw = w.lower()
182
- if lw in memory["words"]:
183
- return w # known exact
184
- # find close matches from memory words (keys)
185
- candidates = get_close_matches(lw, memory["words"].keys(), n=1, cutoff=min_ratio)
186
- if candidates:
187
- # preserve casing: if candidate is lower, capitalize if original was capitalized
188
- cand = candidates[0]
189
- if w[0].isupper():
190
- return cand.capitalize()
191
- return cand
192
- return w
193
 
194
- tokens = re.split(r'(\W+)', text) # keep punctuation
195
- corrected_tokens = []
196
- for tok in tokens:
197
- if re.match(r"^[A-Za-z0-9\-']+$", tok):
198
- corrected_tokens.append(fix_word(tok))
199
- else:
200
- corrected_tokens.append(tok)
201
- corrected = ''.join(corrected_tokens)
202
 
203
- # phrase-level: try to replace short substrings that closely match known phrases
204
- # naive approach: for each stored phrase, if it is short and a fuzzy substring of corrected, replace
205
- for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
206
- low_phrase = phrase.lower()
207
- # only replace if phrase length >= 8 chars to avoid noisy matches
208
- if len(low_phrase) < 8:
 
209
  continue
210
- if low_phrase in corrected.lower():
211
- # find exact location, replace preserving case roughly
212
- corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
213
- return corrected
214
-
215
- # ---------- File utilities ----------
216
- def save_as_word(text, filename=None):
217
- if filename is None:
218
- filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
219
- doc = Document()
220
- doc.add_paragraph(text)
221
- doc.save(filename)
222
- return filename
223
-
224
- # ---------- Advanced conversion: pydub auto + ffmpeg heuristics ----------
225
- def convert_to_wav_if_needed(input_path):
226
- """
227
- Advanced conversion:
228
- - pydub (AudioSegment.from_file) first
229
- - if that fails, exhaustive ffmpeg format/rate/channel grid
230
- - writes diagnostics to a temp folder if conversion fails entirely
231
- """
232
- input_path = str(input_path)
233
- lower = input_path.lower()
234
- if lower.endswith(".wav"):
235
- return input_path
236
 
237
- # try pydub first
238
- auto_err = ""
239
- try:
240
- tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
241
- tmp.close()
242
- AudioSegment.from_file(input_path).export(tmp.name, format="wav")
243
- return tmp.name
244
- except Exception as e:
245
- auto_err = traceback.format_exc()
246
  try:
247
- os.unlink(tmp.name)
248
- except Exception:
249
- pass
250
-
251
- # fallback grid
252
- pcm_formats = ['s16le', 's32le', 's24le', 's8', 'u8', 's16be', 'pcm_s16le', 'pcm_u8', 'pcm_u16le']
253
- mulaw_alaw = ['mulaw', 'alaw']
254
- adpcm = ['adpcm_ima_wav', 'adpcm_ms']
255
- extra = ['gsm', 'g726', 'vorbis']
256
- formats = pcm_formats + mulaw_alaw + adpcm + extra
257
- sample_rates = [8000, 11025, 12000, 16000, 22050, 32000, 44100, 48000]
258
- channels = [1, 2]
259
-
260
- diagnostics = []
261
- diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
262
- diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
263
-
264
- for fmt in formats:
265
- for sr in sample_rates:
266
- for ch in channels:
267
- out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
268
- out_wav.close()
269
- cmd = [
270
- "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
271
- "-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_wav.name
272
- ]
273
  try:
274
- proc = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
275
- except Exception as e_run:
276
- diagnostics.append(f"RUN-EXC fmt={fmt} sr={sr} ch={ch} err={e_run}")
277
- try: os.unlink(out_wav.name)
278
- except Exception: pass
279
- continue
280
-
281
- rc = proc.returncode
282
- stderr = proc.stderr.strip() if proc.stderr else ""
283
- stdout = proc.stdout.strip() if proc.stdout else ""
284
- diagnostics.append(f"ATTEMPT fmt={fmt} sr={sr} ch={ch} rc={rc}")
285
- if stdout:
286
- diagnostics.append("STDOUT:")
287
- diagnostics.append(stdout)
288
- if stderr:
289
- diagnostics.append("STDERR:")
290
- diagnostics.append(stderr)
291
- diagnostics.append("-" * 60)
292
-
293
- try:
294
- if rc == 0 and os.path.exists(out_wav.name) and os.path.getsize(out_wav.name) > MIN_WAV_SIZE:
295
- # success
296
- try:
297
- with open(diag_log, "w", encoding="utf-8") as fh:
298
- fh.write("pydub auto error:\n")
299
- fh.write(auto_err + "\n\n")
300
- fh.write("Successful guess:\n")
301
- fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
302
- fh.write("Diagnostics (last attempts):\n")
303
- fh.write("\n".join(diagnostics[-1000:]))
304
- except Exception:
305
- pass
306
- return out_wav.name
307
  except Exception:
308
  pass
309
 
310
- try: os.unlink(out_wav.name)
311
- except Exception: pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- # ffprobe and hexdump preview
314
- try:
315
- fp = subprocess.run(["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
316
- capture_output=True, text=True, timeout=15)
317
- diagnostics.append("FFPROBE:")
318
- diagnostics.append(fp.stdout.strip() or fp.stderr.strip())
319
- except Exception as e:
320
- diagnostics.append(f"ffprobe failed: {e}")
 
321
 
322
- try:
323
- with open(input_path, "rb") as fh:
324
- head = fh.read(256)
325
- diagnostics.append("HEX PREVIEW:")
326
- diagnostics.append(head.hex())
327
- except Exception as e:
328
- diagnostics.append(f"could not read head: {e}")
329
 
 
330
  try:
331
- with open(diag_log, "w", encoding="utf-8") as fh:
332
- fh.write("pydub auto error:\n")
333
- fh.write(auto_err + "\n\n")
334
- fh.write("Full diagnostics:\n\n")
335
- fh.write("\n".join(diagnostics))
336
- except Exception as e:
337
- raise Exception(f"Conversion failed; diagnostics could not be written: {e}")
338
-
339
- raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}\nSummary: {diagnostics[:6]}")
340
-
341
- # ---------- Whisper model cache ----------
342
- MODEL_CACHE = {}
343
-
344
- def get_whisper_model(name):
345
- if name not in MODEL_CACHE:
346
- MODEL_CACHE[name] = whisper.load_model(name)
347
- return MODEL_CACHE[name]
348
 
349
- # ---------- Main transcription generator ----------
350
- def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbox, zip_file=None, zip_password=None, enable_memory=False):
351
  """
352
- Generator yields (log_text, transcripts_text, word_file_path_or_None, percent_int)
353
- audio_files: path or list of paths (gr.File with type='filepath' gives file path string)
354
  """
355
- log = []
356
- transcripts = []
357
- word_file_path = None
358
- temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
359
- extracted_audio_paths = []
360
-
361
- yield "", "", None, 0
362
-
363
- # cleanup
364
- if os.path.exists(temp_extract_dir):
365
- try:
366
- shutil.rmtree(temp_extract_dir)
367
- log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
368
- except Exception:
369
- pass
370
-
371
- # handle zip
372
  if zip_file:
373
- log.append(f"Processing zip: {zip_file}")
374
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
375
- try:
376
- os.makedirs(temp_extract_dir, exist_ok=True)
377
- with pyzipper.ZipFile(zip_file, "r") as zf:
378
- if zip_password:
379
- try: zf.setpassword(zip_password.encode())
380
- except Exception:
381
- log.append("Incorrect zip password")
382
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
383
- return
384
- exts = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
385
- count = 0
386
- for info in zf.infolist():
387
- if info.is_dir(): continue
388
- _, ext = os.path.splitext(info.filename)
389
- if ext.lower() in exts:
390
- try:
391
- zf.extract(info, path=temp_extract_dir)
392
- p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
393
- if os.path.exists(p):
394
- extracted_audio_paths.append(p)
395
- count += 1
396
- log.append(f"Extracted: {info.filename}")
397
- except Exception as e:
398
- log.append(f"Error extracting {info.filename}: {e}")
399
- if count == 0:
400
- log.append("No supported audio in zip.")
401
- try: shutil.rmtree(temp_extract_dir)
402
- except Exception: pass
403
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
404
- return
405
- except pyzipper.BadZipFile:
406
- log.append("Invalid zip file.")
407
- try: shutil.rmtree(temp_extract_dir)
408
- except Exception: pass
409
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
410
- return
411
- except Exception as e:
412
- log.append(f"Zip processing error: {e}")
413
- try: shutil.rmtree(temp_extract_dir)
414
- except Exception: pass
415
- yield "\n\n".join(log), "\n\n".join(transcripts), None
 
1
+ # ----------------------- CONTINUATION / APP LAUNCH -----------------------
2
+ # Append this to the end of your app.py (after the previous code)
3
+
4
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
5
+ return
6
+
7
+ # collect audio file paths from either audio_files or extracted paths
8
+ paths = []
9
+ if extracted_audio_paths:
10
+ paths.extend(extracted_audio_paths)
11
+ if audio_files:
12
+ # audio_files may be a single path string or list of paths (gr.File gives str)
13
+ if isinstance(audio_files, (list, tuple)):
14
+ for a in audio_files:
15
+ if a:
16
+ paths.append(a)
17
+ elif isinstance(audio_files, str):
18
+ paths.append(audio_files)
19
+
20
+ if not paths:
21
+ log.append("No audio files provided.")
22
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
23
+ return
24
+
25
+ # load model
26
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
 
 
 
 
 
 
 
 
 
 
 
27
  try:
28
+ model = get_whisper_model(model_name)
29
+ log.append(f"Loaded Whisper model: {model_name}")
30
+ except Exception as e:
31
+ log.append(f"Failed to load model {model_name}: {e}")
32
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
33
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ total = len(paths)
36
+ idx = 0
37
+ for p in paths:
38
+ idx += 1
39
+ log.append(f"Processing file ({idx}/{total}): {p}")
40
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx-1) * 80 / max(1, total))
 
 
41
 
42
+ try:
43
+ wav = convert_to_wav_if_needed(p)
44
+ log.append(f"Converted to WAV: {wav}")
45
+ except Exception as e:
46
+ log.append(f"Conversion failed for {p}: {e}")
47
+ transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}")
48
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
49
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # whisper transcription
 
 
 
 
 
 
 
 
52
  try:
53
+ # advanced_options can be used to pass whisper params later
54
+ whisper_opts = {}
55
+ # e.g., advanced_options might contain 'language' or 'task'; keep simple for now
56
+ if isinstance(advanced_options, dict):
57
+ whisper_opts.update(advanced_options)
58
+
59
+ result = model.transcribe(wav, **whisper_opts)
60
+ text = result.get("text", "").strip()
61
+ log.append(f"Transcribed: {len(text)} chars")
62
+ # memory correction & postprocess
63
+ if enable_memory:
64
+ text = memory_correct_text(text)
65
+ text = postprocess_transcript(text)
66
+ transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
67
+ # update persistent memory
68
+ if enable_memory:
 
 
 
 
 
 
 
 
 
 
69
  try:
70
+ update_memory_with_transcript(text)
71
+ log.append("Memory updated.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception:
73
  pass
74
 
75
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
76
+ except Exception as e:
77
+ log.append(f"Transcription failed for {p}: {e}")
78
+ transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}")
79
+ yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
80
+ continue
81
+ finally:
82
+ # cleanup intermediate wav if it was created in tempdir and not original .wav
83
+ try:
84
+ if wav and os.path.exists(wav) and (not p.lower().endswith(".wav")):
85
+ # only remove if it's a tmp file in tmpdir
86
+ tmpdir = tempfile.gettempdir()
87
+ if os.path.commonpath([tmpdir, os.path.abspath(wav)]) == tmpdir:
88
+ os.unlink(wav)
89
+ except Exception:
90
+ pass
91
 
92
+ # final merge option
93
+ if merge_checkbox:
94
+ try:
95
+ merged_text = "\n\n".join(transcripts)
96
+ word_file_path = save_as_word(merged_text)
97
+ log.append(f"Merged transcript saved: {word_file_path}")
98
+ except Exception as e:
99
+ log.append(f"Failed to save merged file: {e}")
100
+ word_file_path = None
101
 
102
+ # final yield
103
+ yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
 
 
 
 
 
104
 
105
+ # cleanup extracted dir
106
  try:
107
+ if os.path.exists(temp_extract_dir):
108
+ shutil.rmtree(temp_extract_dir)
109
+ log.append("Cleaned temporary extraction dir.")
110
+ except Exception:
111
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ # ----------------------- Gradio UI -----------------------
114
+ def run_transcription_wrapper(files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options):
115
  """
116
+ Gradio wrapper: accepts file upload(s) and zip file (single), returns final outputs.
117
+ Because Gradio supports generator functions directly, we can return the generator.
118
  """
119
+ # normalize inputs
120
+ audio_input = files
121
+ zip_path = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if zip_file:
123
+ # gr.File will provide a dict-like or path depending on version; try to handle both
124
+ if isinstance(zip_file, (str, os.PathLike)):
125
+ zip_path = str(zip_file)
126
+ elif hasattr(zip_file, "name"):
127
+ zip_path = zip_file.name
128
+ elif isinstance(zip_file, dict) and zip_file.get("name"):
129
+ zip_path = zip_file["name"]
130
+ # advanced options not used heavily here; keep empty dict if None
131
+ adv = advanced_options or {}
132
+ return transcribe_multiple(audio_input, model_name, adv, merge_checkbox=merge, zip_file=zip_path, zip_password=zip_password, enable_memory=enable_memory)
133
+
134
+ # Build Blocks UI
135
+ demo = gr.Blocks()
136
+
137
+ with demo:
138
+ gr.Markdown("## Whisper Transcription (Spaces-ready)")
139
+ with gr.Row():
140
+ with gr.Column(scale=2):
141
+ file_input = gr.File(label="Upload audio files (or zip)", file_count="multiple", type="filepath")
142
+ zip_input = gr.File(label="Optional: Upload zip file containing audio", file_count="single", type="filepath")
143
+ zip_password = gr.Textbox(label="Zip password (if any)", placeholder="password (optional)")
144
+ model_select = gr.Dropdown(choices=["small","medium","large","base"], value="small", label="Whisper model")
145
+ merge_checkbox = gr.Checkbox(label="Merge transcripts to a single .docx (downloadable)", value=True)
146
+ memory_checkbox = gr.Checkbox(label="Enable persistent memory (word/phrase correction)", value=False)
147
+ submit = gr.Button("Transcribe")
148
+ with gr.Column(scale=3):
149
+ logs = gr.Textbox(label="Logs (streaming)", lines=12)
150
+ transcripts_out = gr.Textbox(label="Transcripts (streaming)", lines=12)
151
+ download_file = gr.File(label="Merged .docx (when enabled)")
152
+
153
+ # connect
154
+ # Gradio supports generator functions directly; the outputs are (logs, transcripts, file, progress)
155
+ submit.click(fn=run_transcription_wrapper,
156
+ inputs=[file_input, model_select, merge_checkbox, zip_input, zip_password, memory_checkbox, gr.State({})],
157
+ outputs=[logs, transcripts_out, download_file])
158
+
159
+ # Ensure we queue and bind to PORT (for Spaces/containers)
160
+ if __name__ == "__main__":
161
+ port = int(os.environ.get("PORT", 7860))
162
+ # Use queue to support longer-running jobs
163
+ demo.queue().launch(server_name="0.0.0.0", server_port=port)