Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,6 +58,7 @@ FFMPEG_CANDIDATES = [
|
|
| 58 |
]
|
| 59 |
MODEL_CACHE = {}
|
| 60 |
EXTRACT_MAP = {} # friendly_name -> path
|
|
|
|
| 61 |
DEFAULT_ZIP_PASS = "dietcoke1"
|
| 62 |
|
| 63 |
CPU_COUNT = max(1, multiprocessing.cpu_count())
|
|
@@ -464,8 +465,14 @@ def _worker_transcribe(args):
|
|
| 464 |
|
| 465 |
# ---------- ZIP extraction & mapping ----------
|
| 466 |
def extract_zip_and_map(zip_path, zip_password=None):
|
| 467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
EXTRACT_MAP = {}
|
|
|
|
| 469 |
run_id = uuid4().hex
|
| 470 |
temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
|
| 471 |
logs = []
|
|
@@ -509,8 +516,15 @@ def extract_zip_and_map(zip_path, zip_password=None):
|
|
| 509 |
logs.append(f"Extracted: {info.filename} -> {key}")
|
| 510 |
if not EXTRACT_MAP:
|
| 511 |
logs.append("No supported audio files found in ZIP.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
return [], "\n".join(logs)
|
| 513 |
friendly = sorted(EXTRACT_MAP.keys())
|
|
|
|
| 514 |
return friendly, "\n".join(logs)
|
| 515 |
except Exception as e:
|
| 516 |
traceback.print_exc()
|
|
@@ -521,6 +535,27 @@ def extract_zip_and_map(zip_path, zip_password=None):
|
|
| 521 |
pass
|
| 522 |
return [], f"Extraction failed: {e}"
|
| 523 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
# ---------- Batch transcription generator (streaming) ----------
|
| 525 |
def batch_transcribe_parallel_generator(
|
| 526 |
friendly_selected,
|
|
@@ -883,9 +918,12 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
|
|
| 883 |
batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
|
| 884 |
batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
|
| 885 |
batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
|
| 886 |
-
batch_preview_btn = gr.Button("
|
| 887 |
batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
|
| 888 |
batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
|
|
|
|
|
|
|
|
|
|
| 889 |
batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
|
| 890 |
batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
|
| 891 |
batch_merge = gr.Checkbox(label="Merge transcripts into DOCX", value=True)
|
|
@@ -902,16 +940,30 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
|
|
| 902 |
batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
|
| 903 |
batch_doc_download = gr.File(label="Download merged DOCX (if created)")
|
| 904 |
|
| 905 |
-
def
|
|
|
|
|
|
|
|
|
|
| 906 |
if not zip_file:
|
| 907 |
-
return "No ZIP provided."
|
| 908 |
path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
|
| 909 |
-
|
| 910 |
-
if
|
| 911 |
-
return
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 915 |
|
| 916 |
# wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
|
| 917 |
def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
|
|
|
|
| 58 |
]
|
| 59 |
MODEL_CACHE = {}
|
| 60 |
EXTRACT_MAP = {} # friendly_name -> path
|
| 61 |
+
LAST_EXTRACT_DIR = None # path to last extraction folder (for download)
|
| 62 |
DEFAULT_ZIP_PASS = "dietcoke1"
|
| 63 |
|
| 64 |
CPU_COUNT = max(1, multiprocessing.cpu_count())
|
|
|
|
| 465 |
|
| 466 |
# ---------- ZIP extraction & mapping ----------
|
| 467 |
def extract_zip_and_map(zip_path, zip_password=None):
|
| 468 |
+
"""
|
| 469 |
+
Extract ZIP into a per-run temp dir, populate EXTRACT_MAP (friendly name -> file path),
|
| 470 |
+
and set LAST_EXTRACT_DIR to the extraction folder for download.
|
| 471 |
+
Returns (friendly_list, logs_str)
|
| 472 |
+
"""
|
| 473 |
+
global EXTRACT_MAP, LAST_EXTRACT_DIR
|
| 474 |
EXTRACT_MAP = {}
|
| 475 |
+
LAST_EXTRACT_DIR = None
|
| 476 |
run_id = uuid4().hex
|
| 477 |
temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
|
| 478 |
logs = []
|
|
|
|
| 516 |
logs.append(f"Extracted: {info.filename} -> {key}")
|
| 517 |
if not EXTRACT_MAP:
|
| 518 |
logs.append("No supported audio files found in ZIP.")
|
| 519 |
+
# cleanup temp dir if empty
|
| 520 |
+
try:
|
| 521 |
+
if os.path.exists(temp_extract_dir) and not os.listdir(temp_extract_dir):
|
| 522 |
+
shutil.rmtree(temp_extract_dir)
|
| 523 |
+
except Exception:
|
| 524 |
+
pass
|
| 525 |
return [], "\n".join(logs)
|
| 526 |
friendly = sorted(EXTRACT_MAP.keys())
|
| 527 |
+
LAST_EXTRACT_DIR = temp_extract_dir
|
| 528 |
return friendly, "\n".join(logs)
|
| 529 |
except Exception as e:
|
| 530 |
traceback.print_exc()
|
|
|
|
| 535 |
pass
|
| 536 |
return [], f"Extraction failed: {e}"
|
| 537 |
|
| 538 |
+
def download_extracted_folder():
|
| 539 |
+
"""
|
| 540 |
+
Zip LAST_EXTRACT_DIR and return zip path for download (or None + message if missing).
|
| 541 |
+
"""
|
| 542 |
+
global LAST_EXTRACT_DIR
|
| 543 |
+
if not LAST_EXTRACT_DIR or not os.path.exists(LAST_EXTRACT_DIR):
|
| 544 |
+
return None, "No extracted folder available for download."
|
| 545 |
+
try:
|
| 546 |
+
zip_tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
|
| 547 |
+
zip_tmp.close()
|
| 548 |
+
with zipfile.ZipFile(zip_tmp.name, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
| 549 |
+
# Walk and add files preserving relative path
|
| 550 |
+
for root, dirs, files in os.walk(LAST_EXTRACT_DIR):
|
| 551 |
+
for f in files:
|
| 552 |
+
fullp = os.path.join(root, f)
|
| 553 |
+
rel = os.path.relpath(fullp, LAST_EXTRACT_DIR)
|
| 554 |
+
zf.write(fullp, arcname=rel)
|
| 555 |
+
return zip_tmp.name, "OK"
|
| 556 |
+
except Exception as e:
|
| 557 |
+
return None, f"Failed to create ZIP: {e}"
|
| 558 |
+
|
| 559 |
# ---------- Batch transcription generator (streaming) ----------
|
| 560 |
def batch_transcribe_parallel_generator(
|
| 561 |
friendly_selected,
|
|
|
|
| 918 |
batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
|
| 919 |
batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
|
| 920 |
batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
|
| 921 |
+
batch_preview_btn = gr.Button("Extract & List ZIP files")
|
| 922 |
batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
|
| 923 |
batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
|
| 924 |
+
# NEW: download extracted folder button + output file
|
| 925 |
+
batch_download_extracted_btn = gr.Button("Download extracted folder")
|
| 926 |
+
batch_extracted_zip = gr.File(label="Downloaded extracted ZIP")
|
| 927 |
batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
|
| 928 |
batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
|
| 929 |
batch_merge = gr.Checkbox(label="Merge transcripts into DOCX", value=True)
|
|
|
|
| 940 |
batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
|
| 941 |
batch_doc_download = gr.File(label="Download merged DOCX (if created)")
|
| 942 |
|
| 943 |
+
def _preview_zip_and_populate(zip_file, password):
|
| 944 |
+
"""
|
| 945 |
+
Extract the zip, populate EXTRACT_MAP and return updated CheckboxGroup choices + logs string.
|
| 946 |
+
"""
|
| 947 |
if not zip_file:
|
| 948 |
+
return gr.update(choices=[]), "No ZIP provided."
|
| 949 |
path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
|
| 950 |
+
friendly, logs = extract_zip_and_map(path, password)
|
| 951 |
+
if friendly:
|
| 952 |
+
# return an update for the CheckboxGroup (populates choices list) and a logs string
|
| 953 |
+
return gr.update(choices=friendly), "\n".join(logs.splitlines())
|
| 954 |
+
return gr.update(choices=[]), logs
|
| 955 |
+
|
| 956 |
+
# wire preview/extract button to update the batch_select choices and preview textbox
|
| 957 |
+
batch_preview_btn.click(fn=_preview_zip_and_populate, inputs=[batch_zip, batch_zip_pass], outputs=[batch_select, batch_preview_out])
|
| 958 |
+
|
| 959 |
+
def _download_extracted_wrapper():
|
| 960 |
+
zip_path, msg = download_extracted_folder()
|
| 961 |
+
if zip_path:
|
| 962 |
+
return zip_path
|
| 963 |
+
# gr.File expects path or None; if failed, return None so nothing is downloadable
|
| 964 |
+
return None
|
| 965 |
+
|
| 966 |
+
batch_download_extracted_btn.click(fn=_download_extracted_wrapper, inputs=[], outputs=[batch_extracted_zip])
|
| 967 |
|
| 968 |
# wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
|
| 969 |
def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
|