staraks commited on
Commit
cf38d7d
·
verified ·
1 Parent(s): 9f68e0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -38
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Improved Whisper Transcriber (per-file selection after unzip, model availability check, SRT export)
3
  # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
4
 
5
  import os
@@ -329,13 +329,11 @@ def convert_to_wav_if_needed(input_path):
329
  def whisper_available_models():
330
  """Return set of model names if whisper provides helper; otherwise conservative fallback."""
331
  try:
332
- # many whisper forks expose available_models()
333
  models = whisper.available_models()
334
  if isinstance(models, (list, tuple, set)):
335
  return set(models)
336
  except Exception:
337
  pass
338
- # fallback: offer the common set but note we can't verify at startup
339
  return set(["tiny", "base", "small", "medium", "large", "large-v3"])
340
 
341
 
@@ -343,12 +341,10 @@ AVAILABLE_MODEL_SET = whisper_available_models()
343
 
344
 
345
  def safe_model_choices(prefer_default="small"):
346
- # hide entries not in AVAILABLE_MODEL_SET
347
  base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
348
  choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
349
  if not choices:
350
- choices = base_choices # if we couldn't detect, still present choices
351
- # ensure prefer_default exists
352
  if prefer_default in choices:
353
  default = prefer_default
354
  else:
@@ -371,12 +367,7 @@ def get_whisper_model(name, device=None):
371
 
372
  # ---------- SRT export ----------
373
  def segments_to_srt(segments):
374
- """
375
- segments: iterable of dicts with 'start','end','text' or whisper segments
376
- returns srt_text
377
- """
378
  def fmt_time(t):
379
- # t in seconds
380
  h = int(t // 3600)
381
  m = int((t % 3600) // 60)
382
  s = int(t % 60)
@@ -391,16 +382,12 @@ def segments_to_srt(segments):
391
  lines.append(str(i))
392
  lines.append(f"{fmt_time(start)} --> {fmt_time(end)}")
393
  lines.append(text)
394
- lines.append("") # blank line
395
  return "\n".join(lines)
396
 
397
 
398
  # ---------- ZIP extraction + mapping for UI ----------
399
  def extract_zip_and_map(zip_path, zip_password=None):
400
- """
401
- Extracts supported audio files into temp dir and builds EXTRACT_MAP mapping friendly basename -> full path.
402
- Returns list of friendly basenames and log string.
403
- """
404
  global EXTRACT_MAP
405
  EXTRACT_MAP = {}
406
  temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
@@ -437,12 +424,9 @@ def extract_zip_and_map(zip_path, zip_password=None):
437
  fullp = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
438
  if not os.path.exists(fullp):
439
  continue
440
- # friendly basename (avoid collisions)
441
  base = os.path.basename(info.filename)
442
- # if collision, append suffix
443
  key = base
444
  if key in EXTRACT_MAP:
445
- # create unique by adding index
446
  idx = count.get(base, 1) + 1
447
  count[base] = idx
448
  name_only, extn = os.path.splitext(base)
@@ -454,7 +438,6 @@ def extract_zip_and_map(zip_path, zip_password=None):
454
  if not EXTRACT_MAP:
455
  logs.append("No supported audio files found in ZIP.")
456
  return [], "\n".join(logs)
457
- # return sorted friendly names
458
  friendly = sorted(EXTRACT_MAP.keys())
459
  return friendly, "\n".join(logs)
460
  except Exception as e:
@@ -468,14 +451,12 @@ def transcribe_single_file(path, model_name="small", device_choice="auto", enabl
468
  try:
469
  if not path:
470
  return None, "", "No file provided."
471
- # normalize path if it's a file-like dict
472
  p = path.name if hasattr(path, "name") else str(path)
473
  device = None if device_choice == "auto" else device_choice
474
  model = get_whisper_model(model_name, device=device)
475
  logs.append(f"Loaded model: {model_name}")
476
  wav = convert_to_wav_if_needed(p)
477
  logs.append(f"Converted to WAV: {os.path.basename(wav)}")
478
- # call whisper transcribe
479
  result = model.transcribe(wav)
480
  text = result.get("text", "").strip()
481
  if enable_memory:
@@ -484,7 +465,6 @@ def transcribe_single_file(path, model_name="small", device_choice="auto", enabl
484
  srt_path = None
485
  if generate_srt and result.get("segments"):
486
  srt_text = segments_to_srt(result["segments"])
487
- # save srt in temp dir
488
  srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
489
  with open(srt_fp, "w", encoding="utf-8") as fh:
490
  fh.write(srt_text)
@@ -496,7 +476,6 @@ def transcribe_single_file(path, model_name="small", device_choice="auto", enabl
496
  logs.append("Memory updated.")
497
  except Exception:
498
  pass
499
- # cleanup intermediate wav if created
500
  if wav and os.path.exists(wav) and wav != p:
501
  try:
502
  os.unlink(wav)
@@ -515,7 +494,6 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
515
  srt_files = []
516
  out_doc = None
517
  paths = []
518
- # selected from zip (friendly names)
519
  if friendly_selected:
520
  for key in friendly_selected:
521
  p = EXTRACT_MAP.get(key)
@@ -523,7 +501,6 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
523
  paths.append(p)
524
  else:
525
  logs.append(f"Warning: selected file not found in extract map: {key}")
526
- # uploaded files
527
  if uploaded_files:
528
  if isinstance(uploaded_files, (list, tuple)):
529
  for f in uploaded_files:
@@ -548,7 +525,6 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
548
  logs.append(f"Merged transcript saved: {out_doc}")
549
  except Exception as e:
550
  logs.append(f"Merge failed: {e}")
551
- # if multiple SRTs, if desired we could zip them; here we just return first SRT if any
552
  srt_return = srt_files[0] if srt_files else None
553
  return combined, "\n".join(logs), out_doc, srt_return
554
 
@@ -559,22 +535,57 @@ print("DEBUG: building Gradio UI", flush=True)
559
  available_choices, default_choice = safe_model_choices(prefer_default="small")
560
 
561
  CSS = """
562
- :root{ --accent:#4f46e5; --muted:#6b7280; --card:#ffffff; --bg:#f7f8fb; }
563
- body { background: var(--bg); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  .header { padding: 14px; border-radius: 10px; background: linear-gradient(90deg, rgba(79,70,229,0.08), rgba(99,102,241,0.02)); margin-bottom: 12px; display:flex;align-items:center;gap:12px; }
565
  .app-icon { width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,var(--accent),#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px; }
566
  .card { background:var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(16,24,40,0.04); }
567
- .transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background:#0f172a; color:#e6eef8; padding:12px; border-radius:8px; min-height:200px; }
568
  .small-note { color:var(--muted); font-size:12px;}
569
  """
570
 
571
- with gr.Blocks(title="Whisper Transcriber (improved)", css=CSS) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  with gr.Row(elem_classes="header"):
573
  with gr.Column(scale=0):
574
  gr.HTML("<div class='app-icon'>WT</div>")
575
  with gr.Column():
576
  gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
577
- gr.Markdown("<div class='small-note'>Per-file selection after unzip, SRT export, model availability checks.</div>")
578
 
579
  with gr.Tabs():
580
  # Single Audio Tab
@@ -604,7 +615,6 @@ with gr.Blocks(title="Whisper Transcriber (improved)", css=CSS) as demo:
604
  return None, "", None, "No audio file provided."
605
  path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
606
  text, srt_path, logs = transcribe_single_file(path, model_name=model_name, device_choice=device, enable_memory=mem_on, generate_srt=srt_on)
607
- # set audio preview to original file
608
  preview = audio_file
609
  return preview, text, srt_path, logs
610
 
@@ -642,7 +652,6 @@ with gr.Blocks(title="Whisper Transcriber (improved)", css=CSS) as demo:
642
  return [], "No ZIP provided."
643
  zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
644
  friendly, logs = extract_zip_and_map(zip_path, password)
645
- # Show friendly names and logs
646
  return friendly, logs
647
 
648
  batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
@@ -745,7 +754,7 @@ with gr.Blocks(title="Whisper Transcriber (improved)", css=CSS) as demo:
745
  mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
746
  mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
747
 
748
- # Settings Tab
749
  with gr.TabItem("Settings"):
750
  with gr.Row():
751
  with gr.Column():
@@ -756,12 +765,34 @@ with gr.Blocks(title="Whisper Transcriber (improved)", css=CSS) as demo:
756
  gr.Markdown("- Provide `fine_tune.py` if you plan to use the Fine-tune workflow.")
757
  with gr.Column():
758
  with gr.Group(elem_classes="card"):
 
 
 
759
  gr.Markdown("### Diagnostics")
760
  diag_btn = gr.Button("Show memory summary")
761
  diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)
762
- diag_btn.click(fn=lambda: (lambda: _view_mem())(), inputs=[], outputs=[diag_out])
763
-
764
- # Launch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  if __name__ == "__main__":
766
  port = int(os.environ.get("PORT", 7860))
767
  print("DEBUG: launching improved Gradio on port", port, flush=True)
 
1
  # app.py
2
+ # Whisper Transcriber Full improved app.py with Dark/Light toggle
3
  # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
4
 
5
  import os
 
329
  def whisper_available_models():
330
  """Return set of model names if whisper provides helper; otherwise conservative fallback."""
331
  try:
 
332
  models = whisper.available_models()
333
  if isinstance(models, (list, tuple, set)):
334
  return set(models)
335
  except Exception:
336
  pass
 
337
  return set(["tiny", "base", "small", "medium", "large", "large-v3"])
338
 
339
 
 
341
 
342
 
343
  def safe_model_choices(prefer_default="small"):
 
344
  base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
345
  choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
346
  if not choices:
347
+ choices = base_choices
 
348
  if prefer_default in choices:
349
  default = prefer_default
350
  else:
 
367
 
368
  # ---------- SRT export ----------
369
  def segments_to_srt(segments):
 
 
 
 
370
  def fmt_time(t):
 
371
  h = int(t // 3600)
372
  m = int((t % 3600) // 60)
373
  s = int(t % 60)
 
382
  lines.append(str(i))
383
  lines.append(f"{fmt_time(start)} --> {fmt_time(end)}")
384
  lines.append(text)
385
+ lines.append("")
386
  return "\n".join(lines)
387
 
388
 
389
  # ---------- ZIP extraction + mapping for UI ----------
390
  def extract_zip_and_map(zip_path, zip_password=None):
 
 
 
 
391
  global EXTRACT_MAP
392
  EXTRACT_MAP = {}
393
  temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
 
424
  fullp = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
425
  if not os.path.exists(fullp):
426
  continue
 
427
  base = os.path.basename(info.filename)
 
428
  key = base
429
  if key in EXTRACT_MAP:
 
430
  idx = count.get(base, 1) + 1
431
  count[base] = idx
432
  name_only, extn = os.path.splitext(base)
 
438
  if not EXTRACT_MAP:
439
  logs.append("No supported audio files found in ZIP.")
440
  return [], "\n".join(logs)
 
441
  friendly = sorted(EXTRACT_MAP.keys())
442
  return friendly, "\n".join(logs)
443
  except Exception as e:
 
451
  try:
452
  if not path:
453
  return None, "", "No file provided."
 
454
  p = path.name if hasattr(path, "name") else str(path)
455
  device = None if device_choice == "auto" else device_choice
456
  model = get_whisper_model(model_name, device=device)
457
  logs.append(f"Loaded model: {model_name}")
458
  wav = convert_to_wav_if_needed(p)
459
  logs.append(f"Converted to WAV: {os.path.basename(wav)}")
 
460
  result = model.transcribe(wav)
461
  text = result.get("text", "").strip()
462
  if enable_memory:
 
465
  srt_path = None
466
  if generate_srt and result.get("segments"):
467
  srt_text = segments_to_srt(result["segments"])
 
468
  srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
469
  with open(srt_fp, "w", encoding="utf-8") as fh:
470
  fh.write(srt_text)
 
476
  logs.append("Memory updated.")
477
  except Exception:
478
  pass
 
479
  if wav and os.path.exists(wav) and wav != p:
480
  try:
481
  os.unlink(wav)
 
494
  srt_files = []
495
  out_doc = None
496
  paths = []
 
497
  if friendly_selected:
498
  for key in friendly_selected:
499
  p = EXTRACT_MAP.get(key)
 
501
  paths.append(p)
502
  else:
503
  logs.append(f"Warning: selected file not found in extract map: {key}")
 
504
  if uploaded_files:
505
  if isinstance(uploaded_files, (list, tuple)):
506
  for f in uploaded_files:
 
525
  logs.append(f"Merged transcript saved: {out_doc}")
526
  except Exception as e:
527
  logs.append(f"Merge failed: {e}")
 
528
  srt_return = srt_files[0] if srt_files else None
529
  return combined, "\n".join(logs), out_doc, srt_return
530
 
 
535
  available_choices, default_choice = safe_model_choices(prefer_default="small")
536
 
537
  CSS = """
538
+ :root{
539
+ --accent:#4f46e5;
540
+ --muted:#6b7280;
541
+ --card:#ffffff;
542
+ --bg:#f7f8fb;
543
+ --text:#0f172a;
544
+ --transcript-bg:#0f172a;
545
+ --transcript-color:#e6eef8;
546
+ }
547
+ [data-theme="dark"] {
548
+ --accent: #7c3aed;
549
+ --muted: #9ca3af;
550
+ --card: #0b1220;
551
+ --bg: #071022;
552
+ --text: #e6eef8;
553
+ --transcript-bg: #071026;
554
+ --transcript-color: #e6eef8;
555
+ }
556
+ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
557
  .header { padding: 14px; border-radius: 10px; background: linear-gradient(90deg, rgba(79,70,229,0.08), rgba(99,102,241,0.02)); margin-bottom: 12px; display:flex;align-items:center;gap:12px; }
558
  .app-icon { width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,var(--accent),#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px; }
559
  .card { background:var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(16,24,40,0.04); }
560
+ .transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background: var(--transcript-bg); color: var(--transcript-color); padding:12px; border-radius:8px; min-height:200px; }
561
  .small-note { color:var(--muted); font-size:12px;}
562
  """
563
 
564
+ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
565
+ # apply saved theme early
566
+ gr.HTML("""
567
+ <script>
568
+ (function() {
569
+ try {
570
+ const saved = localStorage.getItem('wt_theme');
571
+ if (saved) {
572
+ document.documentElement.setAttribute('data-theme', saved);
573
+ } else {
574
+ document.documentElement.setAttribute('data-theme', 'light');
575
+ }
576
+ } catch (e) {
577
+ console.warn('Theme init failed', e);
578
+ }
579
+ })();
580
+ </script>
581
+ """)
582
+
583
  with gr.Row(elem_classes="header"):
584
  with gr.Column(scale=0):
585
  gr.HTML("<div class='app-icon'>WT</div>")
586
  with gr.Column():
587
  gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
588
+ gr.Markdown("<div class='small-note'>Per-file selection after unzip, SRT export, model availability checks, dark/light toggle.</div>")
589
 
590
  with gr.Tabs():
591
  # Single Audio Tab
 
615
  return None, "", None, "No audio file provided."
616
  path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
617
  text, srt_path, logs = transcribe_single_file(path, model_name=model_name, device_choice=device, enable_memory=mem_on, generate_srt=srt_on)
 
618
  preview = audio_file
619
  return preview, text, srt_path, logs
620
 
 
652
  return [], "No ZIP provided."
653
  zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
654
  friendly, logs = extract_zip_and_map(zip_path, password)
 
655
  return friendly, logs
656
 
657
  batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
 
754
  mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
755
  mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
756
 
757
+ # Settings Tab (includes theme toggle)
758
  with gr.TabItem("Settings"):
759
  with gr.Row():
760
  with gr.Column():
 
765
  gr.Markdown("- Provide `fine_tune.py` if you plan to use the Fine-tune workflow.")
766
  with gr.Column():
767
  with gr.Group(elem_classes="card"):
768
+ gr.Markdown("### Theme")
769
+ theme_toggle = gr.Button("Toggle Dark / Light Theme")
770
+ theme_note = gr.Markdown("Theme preference is saved in your browser (localStorage).")
771
  gr.Markdown("### Diagnostics")
772
  diag_btn = gr.Button("Show memory summary")
773
  diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)
774
+ diag_btn.click(fn=_view_mem, inputs=[], outputs=[diag_out])
775
+
776
+ # client-side JS toggle (runs without Python)
777
+ theme_toggle.click(
778
+ None,
779
+ [],
780
+ [],
781
+ _js="""
782
+ () => {
783
+ try {
784
+ const root = document.documentElement;
785
+ const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
786
+ root.setAttribute('data-theme', cur);
787
+ localStorage.setItem('wt_theme', cur);
788
+ } catch (e) {
789
+ console.error('Theme toggle failed', e);
790
+ }
791
+ }
792
+ """
793
+ )
794
+
795
+ # ---------- Launch ----------
796
  if __name__ == "__main__":
797
  port = int(os.environ.get("PORT", 7860))
798
  print("DEBUG: launching improved Gradio on port", port, flush=True)