staraks commited on
Commit
fc6402d
·
verified ·
1 Parent(s): b7cd478

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -277
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py
2
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
3
- # Clean, single-version file for Hugging Face Spaces.
4
 
5
  import os
6
  import sys
@@ -13,12 +13,12 @@ import threading
13
  import re
14
  from difflib import get_close_matches
15
 
16
- # Force unbuffered output so container logs show prints immediately
17
  os.environ["PYTHONUNBUFFERED"] = "1"
18
 
19
  print("DEBUG: app.py bootstrap starting", flush=True)
20
 
21
- # Third-party imports (must be installed in the environment)
22
  try:
23
  from docx import Document
24
  import whisper
@@ -35,7 +35,7 @@ print("DEBUG: imports OK", flush=True)
35
  # ---------- Config ----------
36
  MEMORY_FILE = "memory.json"
37
  MEMORY_LOCK = threading.Lock()
38
- MIN_WAV_SIZE = 1024 # bytes
39
  FFMPEG_CANDIDATES = [
40
  ("s16le", 16000, 1),
41
  ("s16le", 44100, 2),
@@ -77,13 +77,9 @@ def save_memory(mem):
77
 
78
 
79
  memory = load_memory()
80
- print(
81
- "DEBUG: memory loaded (words=%d phrases=%d)"
82
- % (len(memory.get("words", {})), len(memory.get("phrases", {}))),
83
- flush=True,
84
- )
85
 
86
- # ---------- Postprocessing ----------
87
  MEDICAL_ABBREVIATIONS = {
88
  "pt": "patient",
89
  "dx": "diagnosis",
@@ -159,14 +155,12 @@ def postprocess_transcript(text, format_soap=False):
159
  if kw in t.lower():
160
  assessment = "Assessment: " + subj
161
  break
162
- soap = (
163
- f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
164
- )
165
  return soap
166
  return t
167
 
168
 
169
- # ---------- Memory utilities ----------
170
  def extract_words_and_phrases(text):
171
  words = re.findall(r"[A-Za-z0-9\-']+", text)
172
  sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
@@ -234,7 +228,7 @@ def memory_correct_text(text, min_ratio=0.85):
234
  return corrected
235
 
236
 
237
- # ---------- Memory management UI helpers ----------
238
  def import_memory_file(uploaded):
239
  global memory
240
  if not uploaded:
@@ -319,7 +313,7 @@ def clear_memory():
319
  return "Memory cleared."
320
 
321
 
322
- def view_memory(limit=2000):
323
  w = memory.get("words", {})
324
  p = memory.get("phrases", {})
325
  out_lines = []
@@ -461,136 +455,105 @@ def convert_to_wav_if_needed(input_path):
461
  MODEL_CACHE = {}
462
 
463
 
464
- def get_whisper_model(name):
465
  if name not in MODEL_CACHE:
466
  print(f"DEBUG: loading whisper model '{name}'", flush=True)
467
- MODEL_CACHE[name] = whisper.load_model(name)
 
 
 
468
  return MODEL_CACHE[name]
469
 
470
 
471
- # ---------- Main transcription generator ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  def transcribe_multiple(
473
- audio_files,
474
  model_name,
475
  advanced_options,
476
  merge_checkbox,
477
- zip_file=None,
478
- zip_password=None,
479
  enable_memory=False,
 
480
  ):
 
 
 
 
481
  log = []
482
  transcripts = []
483
  word_file_path = None
484
- temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
485
- extracted_audio_paths = []
486
-
487
- yield "", "", None, 0
488
-
489
- if os.path.exists(temp_extract_dir):
490
- try:
491
- shutil.rmtree(temp_extract_dir)
492
- log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
493
- except Exception:
494
- pass
495
 
496
- if zip_file:
497
- log.append(f"Processing zip: {zip_file}")
498
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
499
- try:
500
- os.makedirs(temp_extract_dir, exist_ok=True)
501
- with pyzipper.ZipFile(zip_file, "r") as zf:
502
- if zip_password:
503
- try:
504
- zf.setpassword(zip_password.encode())
505
- except Exception:
506
- log.append("Failed to set zip password (unexpected).")
507
-
508
- exts = [
509
- ".mp3",
510
- ".wav",
511
- ".aac",
512
- ".flac",
513
- ".ogg",
514
- ".m4a",
515
- ".dat",
516
- ".dct",
517
- ]
518
- count = 0
519
- for info in zf.infolist():
520
- if info.is_dir():
521
- continue
522
- _, ext = os.path.splitext(info.filename)
523
- if ext.lower() in exts:
524
- try:
525
- zf.extract(info, path=temp_extract_dir)
526
- except RuntimeError as e:
527
- log.append(f"Password required or incorrect for {info.filename}: {e}")
528
- continue
529
- except pyzipper.BadZipFile:
530
- log.append(f"Bad zip entry: {info.filename}")
531
- continue
532
- except Exception as e:
533
- log.append(f"Error extracting {info.filename}: {e}")
534
- continue
535
- p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
536
- if os.path.exists(p):
537
- extracted_audio_paths.append(p)
538
- count += 1
539
- log.append(f"Extracted: {info.filename}")
540
- if count == 0:
541
- log.append("No supported audio in zip.")
542
- try:
543
- shutil.rmtree(temp_extract_dir)
544
- except Exception:
545
- pass
546
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
547
- return
548
- except pyzipper.BadZipFile:
549
- log.append("Invalid zip file.")
550
- try:
551
- shutil.rmtree(temp_extract_dir)
552
- except Exception:
553
- pass
554
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
555
- return
556
- except Exception as e:
557
- log.append(f"Zip processing error: {e}")
558
- try:
559
- shutil.rmtree(temp_extract_dir)
560
- except Exception:
561
- pass
562
- yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
563
- return
564
-
565
- paths = []
566
- if extracted_audio_paths:
567
- paths.extend(extracted_audio_paths)
568
- if audio_files:
569
- if isinstance(audio_files, (list, tuple)):
570
- for a in audio_files:
571
- if a:
572
- paths.append(a)
573
- elif isinstance(audio_files, str):
574
- paths.append(audio_files)
575
-
576
- if not paths:
577
  log.append("No audio files provided.")
578
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
579
  return
580
 
 
 
 
581
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
582
  try:
583
- model = get_whisper_model(model_name)
584
  log.append(f"Loaded Whisper model: {model_name}")
585
  except Exception as e:
586
  log.append(f"Failed to load model {model_name}: {e}")
587
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
588
  return
589
 
590
- total = len(paths)
591
- idx = 0
592
- for p in paths:
593
- idx += 1
594
  log.append(f"Processing file ({idx}/{total}): {p}")
595
  yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx - 1) * 80 / max(1, total))
596
 
@@ -659,188 +622,213 @@ def transcribe_multiple(
659
 
660
  yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
661
 
662
- try:
663
- if os.path.exists(temp_extract_dir):
664
- shutil.rmtree(temp_extract_dir)
665
- log.append("Cleaned temporary extraction dir.")
666
- except Exception:
667
- pass
668
-
669
-
670
- # ----------------------- Gradio wrapper (streaming) -----------------------
671
- def run_transcription_wrapper(
672
- files,
673
- model_name,
674
- merge,
675
- zip_file,
676
- zip_password,
677
- use_default_zip_pass,
678
- default_zip_password,
679
- enable_memory,
680
- advanced_options_state,
681
- ):
682
- try:
683
- audio_input = files
684
 
685
- zip_path = None
686
- if zip_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
687
  if isinstance(zip_file, (str, os.PathLike)):
688
  zip_path = str(zip_file)
689
  elif hasattr(zip_file, "name"):
690
  zip_path = zip_file.name
691
  elif isinstance(zip_file, dict) and zip_file.get("name"):
692
  zip_path = zip_file["name"]
 
 
 
693
 
694
- if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
695
- final_zip_password = default_zip_password
696
- else:
697
- final_zip_password = zip_password
698
-
699
- adv = {}
700
 
701
- for logs, transcripts, word_path, percent in transcribe_multiple(
702
- audio_input,
 
703
  model_name,
704
  adv,
705
  merge_checkbox=merge,
706
- zip_file=zip_path,
707
- zip_password=final_zip_password,
708
  enable_memory=enable_memory,
 
709
  ):
710
- yield logs, transcripts, word_path, percent
711
-
712
  except Exception:
713
  tb = traceback.format_exc()
714
- logs = f"EXCEPTION in run_transcription_wrapper:\n{tb}"
715
- transcripts = "ERROR: transcription did not start or failed unexpectedly."
716
- yield logs, transcripts, None, 100
717
 
718
 
 
719
  print("DEBUG: building Gradio Blocks", flush=True)
720
-
721
- with gr.Blocks(title="Whisper Transcriber") as demo:
722
  gr.Markdown(
723
- "## Whisper Transcriber\n"
724
- "Upload audio files or a ZIP on the left and click **Transcribe**.\n"
725
- "Transcript, progress, download, and logs appear on the right."
726
  )
727
 
728
- with gr.Row():
729
- with gr.Column(scale=1):
730
- gr.Markdown("### Input")
731
-
732
- file_input = gr.File(
733
- label="Audio files",
734
- file_count="multiple",
735
- type="filepath",
736
- height=60,
737
- )
738
-
739
- zip_input = gr.File(
740
- label="ZIP with audio (optional)",
741
- file_count="single",
742
- type="filepath",
743
- height=60,
744
- )
745
-
746
- use_default_zip_pass = gr.Checkbox(
747
- label="Use default ZIP password",
748
- value=False,
749
- )
750
-
751
- default_zip_password = gr.Textbox(
752
- label="Default ZIP password",
753
- value="dietcoke1",
754
- interactive=True,
755
- )
756
-
757
- zip_password = gr.Textbox(
758
- label="ZIP password (override)",
759
- placeholder="If empty, default password will be used",
760
- )
761
-
762
- model_select = gr.Dropdown(
763
- choices=["small", "medium", "large", "base"],
764
- value="small",
765
- label="Whisper model",
766
- )
767
-
768
- merge_checkbox = gr.Checkbox(
769
- label="Merge all transcripts into one .docx",
770
- value=True,
771
- )
772
-
773
- memory_checkbox = gr.Checkbox(
774
- label="Enable correction memory (use during transcription)",
775
- value=False,
776
- )
777
-
778
- submit = gr.Button("Transcribe", variant="primary")
779
-
780
- gr.Markdown("### Memory management")
781
- mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="file")
782
- mem_import_btn = gr.Button("Import Memory File")
783
- mem_manual_entry = gr.Textbox(label="Add word/phrase to memory (manual)", placeholder="Type a word or phrase")
784
- mem_add_btn = gr.Button("Add to Memory")
785
- mem_clear_btn = gr.Button("Clear Memory")
786
- mem_view_btn = gr.Button("View Memory")
787
- mem_status = gr.Textbox(label="Memory status", interactive=False, lines=6)
788
-
789
- with gr.Column(scale=1):
790
- gr.Markdown("### Output")
791
-
792
- transcripts_out = gr.Textbox(
793
- label="Transcript",
794
- lines=18,
795
- interactive=False,
796
- )
797
-
798
- progress_num = gr.Slider(
799
- minimum=0,
800
- maximum=100,
801
- value=0,
802
- step=1,
803
- label="Progress (%)",
804
- interactive=False,
805
  )
806
 
807
- download_file = gr.File(
808
- label="Merged .docx (when available)"
809
- )
810
-
811
- logs = gr.Textbox(
812
- label="Logs",
813
- lines=10,
814
- interactive=False,
815
- )
816
-
817
- submit.click(
818
- fn=run_transcription_wrapper,
819
- inputs=[
820
- file_input,
821
- model_select,
822
- merge_checkbox,
823
- zip_input,
824
- zip_password,
825
- use_default_zip_pass,
826
- default_zip_password,
827
- memory_checkbox,
828
- gr.State({}),
829
- ],
830
- outputs=[logs, transcripts_out, download_file, progress_num],
831
- )
832
-
833
- def _import_memory(uploaded):
834
- return import_memory_file(uploaded)
835
-
836
- mem_import_btn.click(fn=_import_memory, inputs=[mem_upload], outputs=[mem_status])
837
-
838
- mem_add_btn.click(fn=add_memory_entry, inputs=[mem_manual_entry], outputs=[mem_status])
839
-
840
- mem_clear_btn.click(fn=lambda: clear_memory(), inputs=[], outputs=[mem_status])
841
-
842
- mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
843
-
 
 
 
844
  if __name__ == "__main__":
845
  port = int(os.environ.get("PORT", 7860))
846
  print("DEBUG: launching Gradio on port", port, flush=True)
 
1
  # app.py
2
  # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
3
+ # Multi-tab UI, zip extraction + selectable files, memory management
4
 
5
  import os
6
  import sys
 
13
  import re
14
  from difflib import get_close_matches
15
 
16
+ # Force unbuffered output
17
  os.environ["PYTHONUNBUFFERED"] = "1"
18
 
19
  print("DEBUG: app.py bootstrap starting", flush=True)
20
 
21
+ # Third-party imports
22
  try:
23
  from docx import Document
24
  import whisper
 
35
  # ---------- Config ----------
36
  MEMORY_FILE = "memory.json"
37
  MEMORY_LOCK = threading.Lock()
38
+ MIN_WAV_SIZE = 1024
39
  FFMPEG_CANDIDATES = [
40
  ("s16le", 16000, 1),
41
  ("s16le", 44100, 2),
 
77
 
78
 
79
  memory = load_memory()
80
+ print("DEBUG: memory loaded (words=%d phrases=%d)" % (len(memory.get("words", {})), len(memory.get("phrases", {}))), flush=True)
 
 
 
 
81
 
82
+ # ---------- Postprocessing (same as before) ----------
83
  MEDICAL_ABBREVIATIONS = {
84
  "pt": "patient",
85
  "dx": "diagnosis",
 
155
  if kw in t.lower():
156
  assessment = "Assessment: " + subj
157
  break
158
+ soap = f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
 
 
159
  return soap
160
  return t
161
 
162
 
163
+ # ---------- Memory utilities (same as before) ----------
164
  def extract_words_and_phrases(text):
165
  words = re.findall(r"[A-Za-z0-9\-']+", text)
166
  sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
 
228
  return corrected
229
 
230
 
231
+ # ---------- Memory management UI helpers (same as before) ----------
232
  def import_memory_file(uploaded):
233
  global memory
234
  if not uploaded:
 
313
  return "Memory cleared."
314
 
315
 
316
+ def view_memory(limit=4000):
317
  w = memory.get("words", {})
318
  p = memory.get("phrases", {})
319
  out_lines = []
 
455
  MODEL_CACHE = {}
456
 
457
 
458
+ def get_whisper_model(name, device=None):
459
  if name not in MODEL_CACHE:
460
  print(f"DEBUG: loading whisper model '{name}'", flush=True)
461
+ if device:
462
+ MODEL_CACHE[name] = whisper.load_model(name, device=device)
463
+ else:
464
+ MODEL_CACHE[name] = whisper.load_model(name)
465
  return MODEL_CACHE[name]
466
 
467
 
468
+ # ---------- ZIP extraction + selection helpers ----------
469
+ def extract_zip_list(zip_file, zip_password):
470
+ """
471
+ Extract zip to a temp dir and return (list_of_paths, diagnostics_text)
472
+ """
473
+ temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
474
+ try:
475
+ if os.path.exists(temp_extract_dir):
476
+ # clear existing
477
+ try:
478
+ shutil.rmtree(temp_extract_dir)
479
+ except Exception:
480
+ pass
481
+ os.makedirs(temp_extract_dir, exist_ok=True)
482
+ extracted = []
483
+ logs = []
484
+ with pyzipper.ZipFile(zip_file, "r") as zf:
485
+ if zip_password:
486
+ try:
487
+ zf.setpassword(zip_password.encode())
488
+ except Exception:
489
+ logs.append("Warning: failed to set zip password (unexpected).")
490
+ exts = [".mp3", ".wav", ".aac", ".flac", ".ogg", ".m4a", ".dat", ".dct"]
491
+ for info in zf.infolist():
492
+ if info.is_dir():
493
+ continue
494
+ _, ext = os.path.splitext(info.filename)
495
+ if ext.lower() in exts:
496
+ try:
497
+ zf.extract(info, path=temp_extract_dir)
498
+ except RuntimeError as e:
499
+ logs.append(f"Password required/incorrect for {info.filename}: {e}")
500
+ continue
501
+ except pyzipper.BadZipFile:
502
+ logs.append(f"Bad zip entry: {info.filename}")
503
+ continue
504
+ except Exception as e:
505
+ logs.append(f"Error extracting {info.filename}: {e}")
506
+ continue
507
+ p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
508
+ if os.path.exists(p):
509
+ extracted.append(p)
510
+ logs.append(f"Extracted: {info.filename}")
511
+ if not extracted:
512
+ logs.append("No supported audio files found in zip.")
513
+ return [], "\n".join(logs)
514
+ # Return list and logs
515
+ return extracted, "\n".join(logs)
516
+ except Exception as e:
517
+ traceback.print_exc()
518
+ return [], f"Extraction failed: {e}"
519
+
520
+
521
+ # ---------- Main transcription generator (updated to accept explicit 'selected_paths') ----------
522
  def transcribe_multiple(
523
+ selected_paths,
524
  model_name,
525
  advanced_options,
526
  merge_checkbox,
 
 
527
  enable_memory=False,
528
+ device=None,
529
  ):
530
+ """
531
+ Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
532
+ selected_paths: list of absolute file paths to process
533
+ """
534
  log = []
535
  transcripts = []
536
  word_file_path = None
 
 
 
 
 
 
 
 
 
 
 
537
 
538
+ if not selected_paths:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  log.append("No audio files provided.")
540
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
541
  return
542
 
543
+ yield "", "", None, 0
544
+
545
+ # load model
546
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
547
  try:
548
+ model = get_whisper_model(model_name, device=device)
549
  log.append(f"Loaded Whisper model: {model_name}")
550
  except Exception as e:
551
  log.append(f"Failed to load model {model_name}: {e}")
552
  yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
553
  return
554
 
555
+ total = len(selected_paths)
556
+ for idx, p in enumerate(selected_paths, start=1):
 
 
557
  log.append(f"Processing file ({idx}/{total}): {p}")
558
  yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx - 1) * 80 / max(1, total))
559
 
 
622
 
623
  yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
 
626
+ # ----------------------- Gradio callbacks & UI -----------------------
627
+ def extract_zip_for_ui(zip_file, zip_password, use_default_zip_pass, default_zip_password):
628
+ """
629
+ Extract zip and return (checkbox_choices, logs)
630
+ """
631
+ if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
632
+ final_zip_password = default_zip_password
633
+ else:
634
+ final_zip_password = zip_password
635
+ if not zip_file:
636
+ return [], "No ZIP file provided."
637
+ # Normalize zip path
638
+ zip_path = None
639
+ if isinstance(zip_file, (str, os.PathLike)):
640
+ zip_path = str(zip_file)
641
+ elif hasattr(zip_file, "name"):
642
+ zip_path = zip_file.name
643
+ elif isinstance(zip_file, dict) and zip_file.get("name"):
644
+ zip_path = zip_file["name"]
645
+ else:
646
+ return [], "Unable to determine uploaded zip path."
647
+
648
+ extracted, logs = extract_zip_list(zip_path, final_zip_password)
649
+ # For the UI we show readable labels but the choices list will hold full paths
650
+ choices = extracted # list of paths (strings)
651
+ return choices, logs or "Extraction completed."
652
+
653
+
654
+ def run_transcription_ui(selected_files, file_input, model_name, merge, zip_selected_files, zip_file, zip_password, use_default_zip_pass, default_zip_password, enable_memory, device_choice):
655
+ """
656
+ Top-level UI handler invoked by the Transcribe button.
657
+ Priority:
658
+ 1) zip_selected_files: explicit selection of extracted files (checkbox group)
659
+ 2) selected_files from file_input (file input paths)
660
+ 3) zip_file without explicit selection -> extract all then transcribe
661
+ This function returns a Gradio generator (yields) using transcribe_multiple.
662
+ """
663
+ # build final list of files to process
664
+ final_paths = []
665
+
666
+ # If the user selected extracted zip files (zip_selected_files is list of paths), use those
667
+ if zip_selected_files:
668
+ final_paths = zip_selected_files if isinstance(zip_selected_files, (list, tuple)) else [zip_selected_files]
669
+ else:
670
+ # if file_input provided (list of paths), use them
671
+ if file_input:
672
+ if isinstance(file_input, (list, tuple)):
673
+ for a in file_input:
674
+ if a:
675
+ # file_input uses type="filepath" so entries are paths
676
+ final_paths.append(str(a))
677
+ elif isinstance(file_input, str):
678
+ final_paths.append(file_input)
679
+
680
+ # if nothing chosen and zip_file provided, auto-extract all and use them
681
+ if not final_paths and zip_file:
682
+ # reuse extract logic
683
+ if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
684
+ final_zip_password = default_zip_password
685
+ else:
686
+ final_zip_password = zip_password
687
+ zip_path = None
688
  if isinstance(zip_file, (str, os.PathLike)):
689
  zip_path = str(zip_file)
690
  elif hasattr(zip_file, "name"):
691
  zip_path = zip_file.name
692
  elif isinstance(zip_file, dict) and zip_file.get("name"):
693
  zip_path = zip_file["name"]
694
+ if zip_path:
695
+ extracted, logs = extract_zip_list(zip_path, final_zip_password)
696
+ final_paths = extracted
697
 
698
+ # call core generator
699
+ adv = {}
700
+ device = None
701
+ if device_choice and device_choice != "auto":
702
+ device = device_choice # 'cpu' or 'cuda'
 
703
 
704
+ try:
705
+ for logs_text, transcripts_text, word_path, percent in transcribe_multiple(
706
+ final_paths,
707
  model_name,
708
  adv,
709
  merge_checkbox=merge,
 
 
710
  enable_memory=enable_memory,
711
+ device=device,
712
  ):
713
+ yield logs_text, transcripts_text, word_path, percent
 
714
  except Exception:
715
  tb = traceback.format_exc()
716
+ logs_text = f"EXCEPTION in run_transcription_ui:\n{tb}"
717
+ transcripts_text = "ERROR: transcription did not start or failed unexpectedly."
718
+ yield logs_text, transcripts_text, None, 100
719
 
720
 
721
+ # Build UI (Tabs)
722
  print("DEBUG: building Gradio Blocks", flush=True)
723
+ with gr.Blocks(title="Whisper Transcriber — Multi-tab") as demo:
 
724
  gr.Markdown(
725
+ "<h2>Whisper Transcriber</h2>"
726
+ "<p>Upload audio files or a ZIP, extract and choose files, then transcribe.</p>",
 
727
  )
728
 
729
+ with gr.Tabs():
730
+ # ---------------- Transcribe Tab ----------------
731
+ with gr.TabItem("Transcribe"):
732
+ with gr.Row():
733
+ with gr.Column(scale=1):
734
+ gr.Markdown("### Inputs")
735
+
736
+ file_input = gr.File(label="Audio files (optional)", file_count="multiple", type="filepath", height=80)
737
+ zip_input = gr.File(label="ZIP with audio (optional)", file_count="single", type="filepath", height=80)
738
+
739
+ with gr.Row():
740
+ zip_password = gr.Textbox(label="ZIP password (override)", placeholder="Optional")
741
+ use_default_zip_pass = gr.Checkbox(label="Use default ZIP password", value=False)
742
+ default_zip_password = gr.Textbox(label="Default ZIP password", value="", interactive=True)
743
+
744
+ model_select = gr.Dropdown(choices=["small", "medium", "large", "base"], value="small", label="Whisper model")
745
+ device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device (auto tries default)")
746
+
747
+ merge_checkbox = gr.Checkbox(label="Merge all transcripts into one .docx", value=True)
748
+ memory_checkbox = gr.Checkbox(label="Enable correction memory", value=False)
749
+
750
+ gr.Markdown("### ZIP extraction & file selection")
751
+ extract_btn = gr.Button("Extract ZIP & List Files")
752
+ extracted_files_check = gr.CheckboxGroup(choices=[], label="Select extracted files to transcribe (optional)", interactive=True)
753
+ extract_logs = gr.Textbox(label="Extraction logs", interactive=False, lines=6)
754
+
755
+ # action buttons
756
+ transcribe_btn = gr.Button("Transcribe Selected / Uploaded")
757
+ with gr.Column(scale=1):
758
+ gr.Markdown("### Output")
759
+ transcripts_out = gr.Textbox(label="Transcript", lines=20, interactive=False)
760
+ progress_num = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Progress (%)", interactive=False)
761
+ download_file = gr.File(label="Merged .docx (when available)")
762
+ logs = gr.Textbox(label="Logs", lines=12, interactive=False)
763
+
764
+ # Wire extract button
765
+ def _extract_click(zip_file, zip_password, use_default_zip_pass, default_zip_password):
766
+ choices, logstxt = extract_zip_for_ui(zip_file, zip_password, use_default_zip_pass, default_zip_password)
767
+ # choices are paths; show them in CheckboxGroup
768
+ return choices, logstxt
769
+
770
+ extract_btn.click(fn=_extract_click, inputs=[zip_input, zip_password, use_default_zip_pass, default_zip_password], outputs=[extracted_files_check, extract_logs])
771
+
772
+ # Wire transcribe button: need to pass selected extracted files (list), file_input, model, merge, zip file (for fallback), etc.
773
+ transcribe_btn.click(
774
+ fn=run_transcription_ui,
775
+ inputs=[
776
+ extracted_files_check, # zip_selected_files
777
+ file_input, # file_input
778
+ model_select,
779
+ merge_checkbox,
780
+ # pass in zip file so fallback is possible
781
+ extracted_files_check, # placeholder to keep ordering (not used) - we will also pass zip_input below
782
+ zip_input,
783
+ zip_password,
784
+ use_default_zip_pass,
785
+ default_zip_password,
786
+ memory_checkbox,
787
+ device_choice,
788
+ ],
789
+ outputs=[logs, transcripts_out, download_file, progress_num],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  )
791
 
792
+ # ---------------- Memory Tab ----------------
793
+ with gr.TabItem("Memory"):
794
+ with gr.Row():
795
+ with gr.Column(scale=1):
796
+ gr.Markdown("### Memory Tools")
797
+ mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="filepath")
798
+ mem_import_btn = gr.Button("Import Memory File")
799
+ mem_manual_entry = gr.Textbox(label="Add word/phrase to memory (manual)", placeholder="Type a word or phrase")
800
+ mem_add_btn = gr.Button("Add to Memory")
801
+ mem_clear_btn = gr.Button("Clear Memory")
802
+ mem_view_btn = gr.Button("View Memory")
803
+ mem_status = gr.Textbox(label="Memory status", interactive=False, lines=12)
804
+
805
+ # memory bindings
806
+ def _import_mem(uploaded):
807
+ return import_memory_file(uploaded)
808
+
809
+ mem_import_btn.click(fn=_import_mem, inputs=[mem_upload], outputs=[mem_status])
810
+ mem_add_btn.click(fn=add_memory_entry, inputs=[mem_manual_entry], outputs=[mem_status])
811
+ mem_clear_btn.click(fn=lambda: clear_memory(), inputs=[], outputs=[mem_status])
812
+ mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
813
+
814
+ # ---------------- Settings Tab ----------------
815
+ with gr.TabItem("Settings"):
816
+ with gr.Row():
817
+ with gr.Column():
818
+ gr.Markdown("### Settings")
819
+ gr.Markdown("- Use `Device` in Transcribe tab to force CPU/GPU. Default uses whisper's choice.")
820
+ gr.Markdown("- `Default ZIP password` is empty by default for safety.")
821
+ gr.Markdown("- If you want extracted-file preview before transcribing, click **Extract ZIP & List Files** first.")
822
+ with gr.Column():
823
+ gr.Markdown("### Diagnostics")
824
+ diag_btn = gr.Button("Show memory summary")
825
+ diag_out = gr.Textbox(label="Diagnostics output", interactive=False, lines=12)
826
+
827
+ diag_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[diag_out])
828
+
829
+ # end tabs
830
+
831
+ # ---------- Launch ----------
832
  if __name__ == "__main__":
833
  port = int(os.environ.get("PORT", 7860))
834
  print("DEBUG: launching Gradio on port", port, flush=True)