staraks commited on
Commit
bf79b27
·
verified ·
1 Parent(s): cf38d7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -44
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Whisper Transcriber — Full improved app.py with Dark/Light toggle
3
  # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
4
 
5
  import os
@@ -12,7 +12,9 @@ import traceback
12
  import threading
13
  import re
14
  from difflib import get_close_matches
 
15
  from pathlib import Path
 
16
 
17
  # Force unbuffered prints for logs
18
  os.environ["PYTHONUNBUFFERED"] = "1"
@@ -386,19 +388,19 @@ def segments_to_srt(segments):
386
  return "\n".join(lines)
387
 
388
 
389
- # ---------- ZIP extraction + mapping for UI ----------
390
  def extract_zip_and_map(zip_path, zip_password=None):
 
 
 
 
391
  global EXTRACT_MAP
392
  EXTRACT_MAP = {}
393
- temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
 
 
394
  try:
395
- if os.path.exists(temp_extract_dir):
396
- try:
397
- shutil.rmtree(temp_extract_dir)
398
- except Exception:
399
- pass
400
  os.makedirs(temp_extract_dir, exist_ok=True)
401
- logs = []
402
  with pyzipper.ZipFile(zip_path, "r") as zf:
403
  if zip_password:
404
  try:
@@ -442,53 +444,251 @@ def extract_zip_and_map(zip_path, zip_password=None):
442
  return friendly, "\n".join(logs)
443
  except Exception as e:
444
  traceback.print_exc()
 
 
 
 
 
 
445
  return [], f"Extraction failed: {e}"
446
 
447
 
448
- # ---------- Single-file transcribe (with SRT option) ----------
449
- def transcribe_single_file(path, model_name="small", device_choice="auto", enable_memory=False, generate_srt=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  logs = []
451
  try:
452
  if not path:
453
- return None, "", "No file provided."
454
  p = path.name if hasattr(path, "name") else str(path)
455
  device = None if device_choice == "auto" else device_choice
456
- model = get_whisper_model(model_name, device=device)
457
- logs.append(f"Loaded model: {model_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  wav = convert_to_wav_if_needed(p)
459
  logs.append(f"Converted to WAV: {os.path.basename(wav)}")
460
- result = model.transcribe(wav)
461
- text = result.get("text", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  if enable_memory:
463
- text = memory_correct_text(text)
464
- text = postprocess_transcript(text)
 
 
 
 
 
 
 
465
  srt_path = None
466
- if generate_srt and result.get("segments"):
467
- srt_text = segments_to_srt(result["segments"])
468
- srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
 
 
 
469
  with open(srt_fp, "w", encoding="utf-8") as fh:
470
  fh.write(srt_text)
471
  srt_path = srt_fp
472
  logs.append(f"SRT generated: {srt_path}")
473
- if enable_memory:
474
- try:
475
- update_memory_with_transcript(text)
476
- logs.append("Memory updated.")
477
- except Exception:
478
- pass
479
  if wav and os.path.exists(wav) and wav != p:
480
  try:
481
  os.unlink(wav)
482
  except Exception:
483
  pass
484
- return text, srt_path, "\n".join(logs)
 
 
485
  except Exception as e:
486
  tb = traceback.format_exc()
487
  return "", None, f"Transcription error: {e}\n{tb}"
488
 
489
 
490
- # ---------- Batch transcribe (maps friendly names to real paths) ----------
491
- def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt):
492
  logs = []
493
  transcripts = []
494
  srt_files = []
@@ -513,7 +713,17 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
513
  total = len(paths)
514
  for idx, p in enumerate(paths, start=1):
515
  logs.append(f"[{idx}/{total}] Processing: {p}")
516
- text, srt_path, lg = transcribe_single_file(p, model_name=model_name, device_choice=device_name, enable_memory=enable_mem, generate_srt=generate_srt)
 
 
 
 
 
 
 
 
 
 
517
  logs.append(lg)
518
  transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
519
  if srt_path:
@@ -561,7 +771,7 @@ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui,
561
  .small-note { color:var(--muted); font-size:12px;}
562
  """
563
 
564
- with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
565
  # apply saved theme early
566
  gr.HTML("""
567
  <script>
@@ -585,7 +795,7 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
585
  gr.HTML("<div class='app-icon'>WT</div>")
586
  with gr.Column():
587
  gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
588
- gr.Markdown("<div class='small-note'>Per-file selection after unzip, SRT export, model availability checks, dark/light toggle.</div>")
589
 
590
  with gr.Tabs():
591
  # Single Audio Tab
@@ -601,6 +811,10 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
601
  with gr.Row():
602
  mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
603
  srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
 
 
 
 
604
  transcribe_btn = gr.Button("Transcribe", variant="primary")
605
  with gr.Column(scale=1):
606
  with gr.Group(elem_classes="card"):
@@ -610,15 +824,29 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
610
  srt_download = gr.File(label="SRT (if generated / available)")
611
  single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
612
 
613
- def _single_action(audio_file, model_name, device, mem_on, srt_on):
614
  if not audio_file:
615
  return None, "", None, "No audio file provided."
616
  path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
617
- text, srt_path, logs = transcribe_single_file(path, model_name=model_name, device_choice=device, enable_memory=mem_on, generate_srt=srt_on)
 
 
 
 
 
 
 
 
 
 
618
  preview = audio_file
619
  return preview, text, srt_path, logs
620
 
621
- transcribe_btn.click(fn=_single_action, inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle], outputs=[audio_preview, transcript_out, srt_download, single_logs])
 
 
 
 
622
 
623
  # Batch Tab
624
  with gr.TabItem("Batch Transcribe"):
@@ -638,6 +866,10 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
638
  batch_merge = gr.Checkbox(label="Merge transcripts to DOCX", value=True)
639
  batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
640
  batch_srt = gr.Checkbox(label="Generate SRT(s) if available", value=False)
 
 
 
 
641
  batch_run_btn = gr.Button("Start Batch Transcription", variant="primary")
642
  with gr.Column(scale=1):
643
  with gr.Group(elem_classes="card"):
@@ -649,18 +881,34 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
649
 
650
  def _do_extract(zip_file, password):
651
  if not zip_file:
652
- return [], "No ZIP provided."
653
  zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
654
  friendly, logs = extract_zip_and_map(zip_path, password)
655
- return friendly, logs
 
656
 
657
  batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
658
 
659
- def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag):
660
- combined, logs, out_doc, srt_path = batch_transcribe(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag)
 
 
 
 
 
 
 
 
 
 
 
661
  return combined, logs, out_doc, srt_path
662
 
663
- batch_run_btn.click(fn=_do_batch, inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt], outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download])
 
 
 
 
664
 
665
  # Memory Tab
666
  with gr.TabItem("Memory"):
@@ -761,13 +1009,13 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
761
  with gr.Group(elem_classes="card"):
762
  gr.Markdown("### Runtime & tips")
763
  gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
764
- gr.Markdown("- Extraction writes to system temp `extracted_audio`. Re-extracting overwrites it.")
765
- gr.Markdown("- Provide `fine_tune.py` if you plan to use the Fine-tune workflow.")
766
  with gr.Column():
767
  with gr.Group(elem_classes="card"):
768
  gr.Markdown("### Theme")
769
  theme_toggle = gr.Button("Toggle Dark / Light Theme")
770
- theme_note = gr.Markdown("Theme preference is saved in your browser (localStorage).")
771
  gr.Markdown("### Diagnostics")
772
  diag_btn = gr.Button("Show memory summary")
773
  diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)
 
1
  # app.py
2
+ # Whisper Transcriber — Fixed: per-run extract dirs + CheckboxGroup update + misc imports
3
  # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
4
 
5
  import os
 
12
  import threading
13
  import re
14
  from difflib import get_close_matches
15
+ from uuid import uuid4
16
  from pathlib import Path
17
+ from difflib import get_close_matches
18
 
19
  # Force unbuffered prints for logs
20
  os.environ["PYTHONUNBUFFERED"] = "1"
 
388
  return "\n".join(lines)
389
 
390
 
391
+ # ---------- ZIP extraction + mapping for UI (per-run temp dir) ----------
392
  def extract_zip_and_map(zip_path, zip_password=None):
393
+ """
394
+ Extract to a unique per-run temp directory and populate EXTRACT_MAP with absolute paths.
395
+ Returns (friendly_list, logs)
396
+ """
397
  global EXTRACT_MAP
398
  EXTRACT_MAP = {}
399
+ run_id = uuid4().hex
400
+ temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
401
+ logs = []
402
  try:
 
 
 
 
 
403
  os.makedirs(temp_extract_dir, exist_ok=True)
 
404
  with pyzipper.ZipFile(zip_path, "r") as zf:
405
  if zip_password:
406
  try:
 
444
  return friendly, "\n".join(logs)
445
  except Exception as e:
446
  traceback.print_exc()
447
+ # on failure, attempt cleanup
448
+ try:
449
+ if os.path.exists(temp_extract_dir):
450
+ shutil.rmtree(temp_extract_dir)
451
+ except Exception:
452
+ pass
453
  return [], f"Extraction failed: {e}"
454
 
455
 
456
+ # ---------- Audio trimming helper for two-pass ----------
457
+ def trim_audio_segment(src_path, start_sec, end_sec):
458
+ """
459
+ Extract a short audio segment [start_sec, end_sec] to a temp wav file using ffmpeg.
460
+ Returns path to wav or raises exception.
461
+ """
462
+ src = str(src_path)
463
+ out_tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
464
+ out_tmp.close()
465
+ out_path = out_tmp.name
466
+ try:
467
+ cmd = [
468
+ "ffmpeg",
469
+ "-hide_banner",
470
+ "-loglevel",
471
+ "error",
472
+ "-y",
473
+ "-ss",
474
+ str(start_sec),
475
+ "-to",
476
+ str(end_sec),
477
+ "-i",
478
+ src,
479
+ "-ar",
480
+ "16000",
481
+ "-ac",
482
+ "1",
483
+ out_path,
484
+ ]
485
+ proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
486
+ if proc.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) < MIN_WAV_SIZE:
487
+ try:
488
+ if os.path.exists(out_path):
489
+ os.unlink(out_path)
490
+ except Exception:
491
+ pass
492
+ raise Exception(f"ffmpeg trim failed: {proc.stderr or proc.stdout}")
493
+ return out_path
494
+ except Exception as e:
495
+ try:
496
+ if os.path.exists(out_path):
497
+ os.unlink(out_path)
498
+ except Exception:
499
+ pass
500
+ raise
501
+
502
+
503
+ # ---------- Transcribe single file (supports two-pass) ----------
504
+ def transcribe_single_file(
505
+ path,
506
+ model_name="small",
507
+ device_choice="auto",
508
+ enable_memory=False,
509
+ generate_srt=False,
510
+ use_two_pass=False,
511
+ fast_model="small",
512
+ refine_model=None,
513
+ refine_threshold=-1.0,
514
+ ):
515
+ """
516
+ If use_two_pass is True:
517
+ 1) run fast_model for quick pass
518
+ 2) apply memory corrections
519
+ 3) for segments with avg_logprob < refine_threshold re-run refine_model on trimmed audio
520
+ 4) recombine segments, apply memory, output text and optional SRT
521
+ """
522
  logs = []
523
  try:
524
  if not path:
525
+ return None, None, "No file provided."
526
  p = path.name if hasattr(path, "name") else str(path)
527
  device = None if device_choice == "auto" else device_choice
528
+
529
+ # If not using two-pass, keep old behavior
530
+ if not use_two_pass:
531
+ model = get_whisper_model(model_name, device=device)
532
+ logs.append(f"Loaded model: {model_name}")
533
+ wav = convert_to_wav_if_needed(p)
534
+ logs.append(f"Converted to WAV: {os.path.basename(wav)}")
535
+ result = model.transcribe(wav)
536
+ text = result.get("text", "").strip()
537
+ if enable_memory:
538
+ text = memory_correct_text(text)
539
+ text = postprocess_transcript(text)
540
+ srt_path = None
541
+ if generate_srt and result.get("segments"):
542
+ srt_text = segments_to_srt(result["segments"])
543
+ srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
544
+ with open(srt_fp, "w", encoding="utf-8") as fh:
545
+ fh.write(srt_text)
546
+ srt_path = srt_fp
547
+ logs.append(f"SRT generated: {srt_path}")
548
+ if enable_memory:
549
+ try:
550
+ update_memory_with_transcript(text)
551
+ logs.append("Memory updated.")
552
+ except Exception:
553
+ pass
554
+ if wav and os.path.exists(wav) and wav != p:
555
+ try:
556
+ os.unlink(wav)
557
+ except Exception:
558
+ pass
559
+ return text, srt_path, "\n".join(logs)
560
+
561
+ # ---------------- Two-pass flow ----------------
562
+ if refine_model is None:
563
+ refine_model = model_name
564
+
565
+ logs.append(f"Two-pass enabled: fast_model={fast_model}, refine_model={refine_model}, threshold={refine_threshold}")
566
+
567
+ # 1) fast pass
568
+ fast = get_whisper_model(fast_model, device=device)
569
+ logs.append(f"Loaded fast model: {fast_model}")
570
  wav = convert_to_wav_if_needed(p)
571
  logs.append(f"Converted to WAV: {os.path.basename(wav)}")
572
+
573
+ fast_result = fast.transcribe(wav)
574
+ segments = fast_result.get("segments") or []
575
+
576
+ # fallback: no segments -> treat as single text
577
+ if not segments:
578
+ text = fast_result.get("text", "").strip()
579
+ if enable_memory:
580
+ text = memory_correct_text(text)
581
+ update_memory_with_transcript(text)
582
+ text = postprocess_transcript(text)
583
+ srt_ret = None
584
+ if generate_srt and fast_result.get("segments"):
585
+ srt_text = segments_to_srt(fast_result["segments"])
586
+ srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
587
+ with open(srt_fp, "w", encoding="utf-8") as fh:
588
+ fh.write(srt_text)
589
+ srt_ret = srt_fp
590
+ logs.append(f"SRT generated: {srt_fp}")
591
+ if wav and os.path.exists(wav) and wav != p:
592
+ try:
593
+ os.unlink(wav)
594
+ except Exception:
595
+ pass
596
+ return text, srt_ret, "\n".join(logs)
597
+
598
+ # 2) memory-correct segments and tag low-confidence ones
599
+ refined_segments = []
600
+ segments_to_refine = []
601
+ for seg in segments:
602
+ seg_text = seg.get("text", "").strip()
603
+ if enable_memory:
604
+ corrected = memory_correct_text(seg_text)
605
+ else:
606
+ corrected = seg_text
607
+ seg_copy = dict(seg)
608
+ seg_copy["text"] = corrected
609
+ refined_segments.append(seg_copy)
610
+ avg_lp = seg.get("avg_logprob", None)
611
+ if avg_lp is None:
612
+ continue
613
+ try:
614
+ if float(avg_lp) < float(refine_threshold):
615
+ segments_to_refine.append(seg_copy)
616
+ except Exception:
617
+ continue
618
+
619
+ logs.append(f"Fast pass produced {len(segments)} segments; {len(segments_to_refine)} queued for refinement.")
620
+
621
+ # 3) refine low-confidence segments
622
+ if segments_to_refine:
623
+ refine = get_whisper_model(refine_model, device=device)
624
+ logs.append(f"Loaded refine model: {refine_model}")
625
+ for seg in segments_to_refine:
626
+ start = seg.get("start", 0.0)
627
+ end = seg.get("end", start + seg.get("duration", 0.0))
628
+ if end <= start:
629
+ continue
630
+ try:
631
+ seg_wav = trim_audio_segment(wav, start, end)
632
+ r_result = refine.transcribe(seg_wav)
633
+ new_text = r_result.get("text", "").strip()
634
+ if enable_memory:
635
+ new_text = memory_correct_text(new_text)
636
+ # update matching segment by start/end
637
+ for rs in refined_segments:
638
+ if abs(rs.get("start", 0.0) - start) < 0.001 and abs(rs.get("end", 0.0) - end) < 0.001:
639
+ rs["text"] = new_text
640
+ if r_result.get("segments"):
641
+ rs["avg_logprob"] = r_result["segments"][0].get("avg_logprob", rs.get("avg_logprob"))
642
+ break
643
+ try:
644
+ if os.path.exists(seg_wav):
645
+ os.unlink(seg_wav)
646
+ except Exception:
647
+ pass
648
+ except Exception as e:
649
+ logs.append(f"Refine failed for segment {start}-{end}: {e}")
650
+ continue
651
+
652
+ # 4) recombine segments
653
+ full_text_parts = [s.get("text", "").strip() for s in sorted(refined_segments, key=lambda x: x.get("start", 0.0))]
654
+ combined_text = " ".join([p for p in full_text_parts if p])
655
  if enable_memory:
656
+ combined_text = memory_correct_text(combined_text)
657
+ try:
658
+ update_memory_with_transcript(combined_text)
659
+ logs.append("Memory updated.")
660
+ except Exception:
661
+ pass
662
+ combined_text = postprocess_transcript(combined_text)
663
+
664
+ # 5) generate SRT if requested
665
  srt_path = None
666
+ if generate_srt:
667
+ srt_segs = []
668
+ for rs in sorted(refined_segments, key=lambda x: x.get("start", 0.0)):
669
+ srt_segs.append({"start": rs.get("start", 0.0), "end": rs.get("end", 0.0), "text": rs.get("text", "")})
670
+ srt_text = segments_to_srt(srt_segs)
671
+ srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}_two_pass.srt")
672
  with open(srt_fp, "w", encoding="utf-8") as fh:
673
  fh.write(srt_text)
674
  srt_path = srt_fp
675
  logs.append(f"SRT generated: {srt_path}")
676
+
 
 
 
 
 
677
  if wav and os.path.exists(wav) and wav != p:
678
  try:
679
  os.unlink(wav)
680
  except Exception:
681
  pass
682
+
683
+ return combined_text, srt_path, "\n".join(logs)
684
+
685
  except Exception as e:
686
  tb = traceback.format_exc()
687
  return "", None, f"Transcription error: {e}\n{tb}"
688
 
689
 
690
+ # ---------- Batch transcribe (uses transcribe_single_file's two-pass when requested) ----------
691
+ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
692
  logs = []
693
  transcripts = []
694
  srt_files = []
 
713
  total = len(paths)
714
  for idx, p in enumerate(paths, start=1):
715
  logs.append(f"[{idx}/{total}] Processing: {p}")
716
+ text, srt_path, lg = transcribe_single_file(
717
+ p,
718
+ model_name=model_name,
719
+ device_choice=device_name,
720
+ enable_memory=enable_mem,
721
+ generate_srt=generate_srt,
722
+ use_two_pass=use_two_pass,
723
+ fast_model=fast_model,
724
+ refine_model=model_name,
725
+ refine_threshold=refine_threshold,
726
+ )
727
  logs.append(lg)
728
  transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
729
  if srt_path:
 
771
  .small-note { color:var(--muted); font-size:12px;}
772
  """
773
 
774
+ with gr.Blocks(title="Whisper Transcriber (dark/light + two-pass)", css=CSS) as demo:
775
  # apply saved theme early
776
  gr.HTML("""
777
  <script>
 
795
  gr.HTML("<div class='app-icon'>WT</div>")
796
  with gr.Column():
797
  gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
798
+ gr.Markdown("<div class='small-note'>Two-pass speedup, per-file selection after unzip, SRT export, model availability checks, dark/light toggle.</div>")
799
 
800
  with gr.Tabs():
801
  # Single Audio Tab
 
811
  with gr.Row():
812
  mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
813
  srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
814
+ with gr.Row():
815
+ use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
816
+ fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
817
+ refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob) — lower is stricter", precision=2)
818
  transcribe_btn = gr.Button("Transcribe", variant="primary")
819
  with gr.Column(scale=1):
820
  with gr.Group(elem_classes="card"):
 
824
  srt_download = gr.File(label="SRT (if generated / available)")
825
  single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
826
 
827
+ def _single_action(audio_file, model_name, device, mem_on, srt_on, use_two_pass_flag, fast_model, refine_thresh):
828
  if not audio_file:
829
  return None, "", None, "No audio file provided."
830
  path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
831
+ text, srt_path, logs = transcribe_single_file(
832
+ path,
833
+ model_name=model_name,
834
+ device_choice=device,
835
+ enable_memory=mem_on,
836
+ generate_srt=srt_on,
837
+ use_two_pass=use_two_pass_flag,
838
+ fast_model=fast_model,
839
+ refine_model=model_name,
840
+ refine_threshold=refine_thresh,
841
+ )
842
  preview = audio_file
843
  return preview, text, srt_path, logs
844
 
845
+ transcribe_btn.click(
846
+ fn=_single_action,
847
+ inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
848
+ outputs=[audio_preview, transcript_out, srt_download, single_logs],
849
+ )
850
 
851
  # Batch Tab
852
  with gr.TabItem("Batch Transcribe"):
 
866
  batch_merge = gr.Checkbox(label="Merge transcripts to DOCX", value=True)
867
  batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
868
  batch_srt = gr.Checkbox(label="Generate SRT(s) if available", value=False)
869
+ with gr.Row():
870
+ batch_use_two_pass = gr.Checkbox(label="Use two-pass speedup", value=False)
871
+ batch_fast_model = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
872
+ batch_refine_threshold = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
873
  batch_run_btn = gr.Button("Start Batch Transcription", variant="primary")
874
  with gr.Column(scale=1):
875
  with gr.Group(elem_classes="card"):
 
881
 
882
  def _do_extract(zip_file, password):
883
  if not zip_file:
884
+ return gr.CheckboxGroup.update(choices=[]), "No ZIP provided."
885
  zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
886
  friendly, logs = extract_zip_and_map(zip_path, password)
887
+ # return a component update so the CheckboxGroup shows new choices reliably
888
+ return gr.CheckboxGroup.update(choices=friendly), logs
889
 
890
  batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
891
 
892
+ def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag, use_two_pass_flag, fast_model, refine_thresh):
893
+ combined, logs, out_doc, srt_path = batch_transcribe(
894
+ friendly_selected,
895
+ uploaded_files,
896
+ model_name,
897
+ device,
898
+ merge_flag,
899
+ mem_flag,
900
+ srt_flag,
901
+ use_two_pass=use_two_pass_flag,
902
+ fast_model=fast_model,
903
+ refine_threshold=refine_thresh,
904
+ )
905
  return combined, logs, out_doc, srt_path
906
 
907
+ batch_run_btn.click(
908
+ fn=_do_batch,
909
+ inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_threshold],
910
+ outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
911
+ )
912
 
913
  # Memory Tab
914
  with gr.TabItem("Memory"):
 
1009
  with gr.Group(elem_classes="card"):
1010
  gr.Markdown("### Runtime & tips")
1011
  gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
1012
+ gr.Markdown("- Extraction writes to a per-run temp directory under system temp. Re-extracting creates a new run dir.")
1013
+ gr.Markdown("- Two-pass helps on long files where heavy model is costly.")
1014
  with gr.Column():
1015
  with gr.Group(elem_classes="card"):
1016
  gr.Markdown("### Theme")
1017
  theme_toggle = gr.Button("Toggle Dark / Light Theme")
1018
+ gr.Markdown("Theme preference is saved in your browser (localStorage).")
1019
  gr.Markdown("### Diagnostics")
1020
  diag_btn = gr.Button("Show memory summary")
1021
  diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)