staraks commited on
Commit
820bd4e
·
verified ·
1 Parent(s): adbc551

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -10
app.py CHANGED
@@ -58,6 +58,7 @@ FFMPEG_CANDIDATES = [
58
  ]
59
  MODEL_CACHE = {}
60
  EXTRACT_MAP = {} # friendly_name -> path
 
61
  DEFAULT_ZIP_PASS = "dietcoke1"
62
 
63
  CPU_COUNT = max(1, multiprocessing.cpu_count())
@@ -464,8 +465,14 @@ def _worker_transcribe(args):
464
 
465
  # ---------- ZIP extraction & mapping ----------
466
  def extract_zip_and_map(zip_path, zip_password=None):
467
- global EXTRACT_MAP
 
 
 
 
 
468
  EXTRACT_MAP = {}
 
469
  run_id = uuid4().hex
470
  temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
471
  logs = []
@@ -509,8 +516,15 @@ def extract_zip_and_map(zip_path, zip_password=None):
509
  logs.append(f"Extracted: {info.filename} -> {key}")
510
  if not EXTRACT_MAP:
511
  logs.append("No supported audio files found in ZIP.")
 
 
 
 
 
 
512
  return [], "\n".join(logs)
513
  friendly = sorted(EXTRACT_MAP.keys())
 
514
  return friendly, "\n".join(logs)
515
  except Exception as e:
516
  traceback.print_exc()
@@ -521,6 +535,27 @@ def extract_zip_and_map(zip_path, zip_password=None):
521
  pass
522
  return [], f"Extraction failed: {e}"
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  # ---------- Batch transcription generator (streaming) ----------
525
  def batch_transcribe_parallel_generator(
526
  friendly_selected,
@@ -883,9 +918,12 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
883
  batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
884
  batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
885
  batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
886
- batch_preview_btn = gr.Button("Preview ZIP members")
887
  batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
888
  batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
 
 
 
889
  batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
890
  batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
891
  batch_merge = gr.Checkbox(label="Merge transcripts into DOCX", value=True)
@@ -902,16 +940,30 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
902
  batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
903
  batch_doc_download = gr.File(label="Download merged DOCX (if created)")
904
 
905
- def _preview_zip(zip_file, password):
 
 
 
906
  if not zip_file:
907
- return "No ZIP provided."
908
  path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
909
- members, logs = extract_zip_and_map(path, password)
910
- if members:
911
- return "\n".join(members)
912
- return logs
913
-
914
- batch_preview_btn.click(fn=_preview_zip, inputs=[batch_zip, batch_zip_pass], outputs=[batch_preview_out])
 
 
 
 
 
 
 
 
 
 
 
915
 
916
  # wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
917
  def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
 
58
  ]
59
  MODEL_CACHE = {}
60
  EXTRACT_MAP = {} # friendly_name -> path
61
+ LAST_EXTRACT_DIR = None # path to last extraction folder (for download)
62
  DEFAULT_ZIP_PASS = "dietcoke1"
63
 
64
  CPU_COUNT = max(1, multiprocessing.cpu_count())
 
465
 
466
  # ---------- ZIP extraction & mapping ----------
467
  def extract_zip_and_map(zip_path, zip_password=None):
468
+ """
469
+ Extract ZIP into a per-run temp dir, populate EXTRACT_MAP (friendly name -> file path),
470
+ and set LAST_EXTRACT_DIR to the extraction folder for download.
471
+ Returns (friendly_list, logs_str)
472
+ """
473
+ global EXTRACT_MAP, LAST_EXTRACT_DIR
474
  EXTRACT_MAP = {}
475
+ LAST_EXTRACT_DIR = None
476
  run_id = uuid4().hex
477
  temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
478
  logs = []
 
516
  logs.append(f"Extracted: {info.filename} -> {key}")
517
  if not EXTRACT_MAP:
518
  logs.append("No supported audio files found in ZIP.")
519
+ # cleanup temp dir if empty
520
+ try:
521
+ if os.path.exists(temp_extract_dir) and not os.listdir(temp_extract_dir):
522
+ shutil.rmtree(temp_extract_dir)
523
+ except Exception:
524
+ pass
525
  return [], "\n".join(logs)
526
  friendly = sorted(EXTRACT_MAP.keys())
527
+ LAST_EXTRACT_DIR = temp_extract_dir
528
  return friendly, "\n".join(logs)
529
  except Exception as e:
530
  traceback.print_exc()
 
535
  pass
536
  return [], f"Extraction failed: {e}"
537
 
538
+ def download_extracted_folder():
539
+ """
540
+ Zip LAST_EXTRACT_DIR and return zip path for download (or None + message if missing).
541
+ """
542
+ global LAST_EXTRACT_DIR
543
+ if not LAST_EXTRACT_DIR or not os.path.exists(LAST_EXTRACT_DIR):
544
+ return None, "No extracted folder available for download."
545
+ try:
546
+ zip_tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
547
+ zip_tmp.close()
548
+ with zipfile.ZipFile(zip_tmp.name, "w", compression=zipfile.ZIP_DEFLATED) as zf:
549
+ # Walk and add files preserving relative path
550
+ for root, dirs, files in os.walk(LAST_EXTRACT_DIR):
551
+ for f in files:
552
+ fullp = os.path.join(root, f)
553
+ rel = os.path.relpath(fullp, LAST_EXTRACT_DIR)
554
+ zf.write(fullp, arcname=rel)
555
+ return zip_tmp.name, "OK"
556
+ except Exception as e:
557
+ return None, f"Failed to create ZIP: {e}"
558
+
559
  # ---------- Batch transcription generator (streaming) ----------
560
  def batch_transcribe_parallel_generator(
561
  friendly_selected,
 
918
  batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
919
  batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
920
  batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
921
+ batch_preview_btn = gr.Button("Extract & List ZIP files")
922
  batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
923
  batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
924
+ # NEW: download extracted folder button + output file
925
+ batch_download_extracted_btn = gr.Button("Download extracted folder")
926
+ batch_extracted_zip = gr.File(label="Downloaded extracted ZIP")
927
  batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
928
  batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
929
  batch_merge = gr.Checkbox(label="Merge transcripts into DOCX", value=True)
 
940
  batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
941
  batch_doc_download = gr.File(label="Download merged DOCX (if created)")
942
 
943
+ def _preview_zip_and_populate(zip_file, password):
944
+ """
945
+ Extract the zip, populate EXTRACT_MAP and return updated CheckboxGroup choices + logs string.
946
+ """
947
  if not zip_file:
948
+ return gr.update(choices=[]), "No ZIP provided."
949
  path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
950
+ friendly, logs = extract_zip_and_map(path, password)
951
+ if friendly:
952
+ # return an update for the CheckboxGroup (populates choices list) and a logs string
953
+ return gr.update(choices=friendly), "\n".join(logs.splitlines())
954
+ return gr.update(choices=[]), logs
955
+
956
+ # wire preview/extract button to update the batch_select choices and preview textbox
957
+ batch_preview_btn.click(fn=_preview_zip_and_populate, inputs=[batch_zip, batch_zip_pass], outputs=[batch_select, batch_preview_out])
958
+
959
+ def _download_extracted_wrapper():
960
+ zip_path, msg = download_extracted_folder()
961
+ if zip_path:
962
+ return zip_path
963
+ # gr.File expects path or None; if failed, return None so nothing is downloadable
964
+ return None
965
+
966
+ batch_download_extracted_btn.click(fn=_download_extracted_wrapper, inputs=[], outputs=[batch_extracted_zip])
967
 
968
  # wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
969
  def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):