moderntranscribe

Sleeping

App Files Files Community

staraks commited on Dec 5, 2025

Commit

b577334

verified ·

1 Parent(s): 820bd4e

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -14

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # app.py
-# Whisper Transcriber — Gradio 3.x compatible complete file
 # Requirements: gradio (3.x), pydub, pyzipper, python-docx, ffmpeg, whisper or faster-whisper
 import os
@@ -59,8 +59,12 @@ FFMPEG_CANDIDATES = [
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> path
 LAST_EXTRACT_DIR = None  # path to last extraction folder (for download)
 DEFAULT_ZIP_PASS = "dietcoke1"
 CPU_COUNT = max(1, multiprocessing.cpu_count())
 MAX_WORKERS = min(4, CPU_COUNT)  # tune for your environment
@@ -470,9 +474,10 @@ def extract_zip_and_map(zip_path, zip_password=None):
     and set LAST_EXTRACT_DIR to the extraction folder for download.
     Returns (friendly_list, logs_str)
     """
-    global EXTRACT_MAP, LAST_EXTRACT_DIR
     EXTRACT_MAP = {}
     LAST_EXTRACT_DIR = None
     run_id = uuid4().hex
     temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
     logs = []
@@ -525,6 +530,7 @@ def extract_zip_and_map(zip_path, zip_password=None):
             return [], "\n".join(logs)
         friendly = sorted(EXTRACT_MAP.keys())
         LAST_EXTRACT_DIR = temp_extract_dir
         return friendly, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
@@ -556,6 +562,76 @@ def download_extracted_folder():
     except Exception as e:
         return None, f"Failed to create ZIP: {e}"
 # ---------- Batch transcription generator (streaming) ----------
 def batch_transcribe_parallel_generator(
     friendly_selected,
@@ -570,6 +646,8 @@ def batch_transcribe_parallel_generator(
     refine_threshold=-1.0,
     zip_password=None,
 ):
     logs = []
     transcripts = []
     per_file_paths = []
@@ -626,6 +704,9 @@ def batch_transcribe_parallel_generator(
                 pct = int(5 + (completed / total) * 90)
                 yield "\n\n".join(logs), "\n\n".join(transcripts), None, pct
         combined = "\n\n".join(transcripts)
         out_doc = None
         if merge_flag:
@@ -856,6 +937,7 @@ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui,
 .card { background:var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(16,24,40,0.04); }
 .transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background: var(--transcript-bg); color: var(--transcript-color); padding:12px; border-radius:8px; min-height:200px; }
 .small-note { color:var(--muted); font-size:12px;}
 """
 with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as demo:
@@ -891,7 +973,8 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                     trans_single_btn = gr.Button("Transcribe")
                 with gr.Column(scale=1):
                     single_trans_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
-                    single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
             def _do_single(audio, model_name, device_name, mem_on, srt_on):
                 if not audio:
@@ -918,15 +1001,28 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                     batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
                     batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
                     batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
                     batch_preview_btn = gr.Button("Extract & List ZIP files")
-                    batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
                     batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
-                    # NEW: download extracted folder button + output file
                     batch_download_extracted_btn = gr.Button("Download extracted folder")
                     batch_extracted_zip = gr.File(label="Downloaded extracted ZIP")
                     batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
                     batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
-                    batch_merge = gr.Checkbox(label="Merge transcripts into DOCX", value=True)
                     batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
                     batch_srt = gr.Checkbox(label="Generate SRTs", value=False)
                     batch_use_two_pass = gr.Checkbox(label="Use two-pass refinement", value=False)
@@ -934,37 +1030,70 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                     batch_refine_thresh = gr.Number(value=-1.0, label="Refine threshold", precision=2)
                     batch_run_btn = gr.Button("Start Batch (parallel)")
                 with gr.Column(scale=1):
-                    batch_logs_out = gr.Textbox(label="Logs", lines=12, interactive=False)
                     batch_combined_out = gr.Textbox(label="Combined transcripts", lines=12, interactive=False)
                     batch_progress = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Progress (%)", interactive=False)
                     batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
                     batch_doc_download = gr.File(label="Download merged DOCX (if created)")
             def _preview_zip_and_populate(zip_file, password):
                 """
-                Extract the zip, populate EXTRACT_MAP and return updated CheckboxGroup choices + logs string.
                 """
                 if not zip_file:
                     return gr.update(choices=[]), "No ZIP provided."
                 path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
                 friendly, logs = extract_zip_and_map(path, password)
                 if friendly:
-                    # return an update for the CheckboxGroup (populates choices list) and a logs string
-                    return gr.update(choices=friendly), "\n".join(logs.splitlines())
                 return gr.update(choices=[]), logs
-            # wire preview/extract button to update the batch_select choices and preview textbox
             batch_preview_btn.click(fn=_preview_zip_and_populate, inputs=[batch_zip, batch_zip_pass], outputs=[batch_select, batch_preview_out])
             def _download_extracted_wrapper():
                 zip_path, msg = download_extracted_folder()
                 if zip_path:
                     return zip_path
-                # gr.File expects path or None; if failed, return None so nothing is downloadable
                 return None
             batch_download_extracted_btn.click(fn=_download_extracted_wrapper, inputs=[], outputs=[batch_extracted_zip])
             # wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
             def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
                 up = uploaded_files
@@ -998,10 +1127,12 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                 with gr.Column(scale=1):
                     mem_upload = gr.File(label="Upload memory files or ZIP (multiple)", file_count="multiple", type="filepath")
                     mem_preview_zip_btn = gr.Button("Preview ZIP members (for selected ZIPs)")
-                    mem_zip_preview_out = gr.Textbox(label="ZIP members (preview)", lines=6, interactive=False)
                     mem_zip_select = gr.CheckboxGroup(choices=[], label="Select ZIP members to import", interactive=True)
                     mem_import_btn = gr.Button("Import selected files / uploaded files")
-                    mem_status = gr.Textbox(label="Import status", lines=8, interactive=False)
                     mem_textbox = gr.Textbox(label="Add single word/phrase", placeholder="Type word or phrase")
                     mem_add_btn = gr.Button("Add to memory")
                     mem_clear_btn = gr.Button("Clear memory")
@@ -1012,6 +1143,8 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                         "- Supported encodings: utf-8, utf-16, latin-1, fallback.\n"
                         "- JSON format: {\"words\":{\"word\":count}, \"phrases\":{\"phrase\":count}}"
                     )
             def _preview_many_zip(uploaded):
                 if not uploaded:
@@ -1024,11 +1157,22 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as de
                         members, log = preview_zip_members_for_memory(str(f))
                         members_total.extend(members)
                 if members_total:
                     return "\n".join(members_total)
                 return "No ZIPs found or no previewable members."
             mem_preview_zip_btn.click(fn=_preview_many_zip, inputs=[mem_upload], outputs=[mem_zip_preview_out])
             def _import_mem(uploaded_files, selected_members):
                 try:
                     status = import_memory_files_multiple(uploaded_files, zip_members_to_import=selected_members)

 # app.py
+# Whisper Transcriber — Gradio 3.x compatible complete file with "merge last batch transcripts" feature
 # Requirements: gradio (3.x), pydub, pyzipper, python-docx, ffmpeg, whisper or faster-whisper
 import os
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> path
 LAST_EXTRACT_DIR = None  # path to last extraction folder (for download)
+LAST_EXTRACT_LIST = []  # friendly names for last extraction (for select all)
 DEFAULT_ZIP_PASS = "dietcoke1"
+# NEW: last batch transcripts (set by batch generator). Each item: (friendly_name, txt_path, srt_path)
+LAST_BATCH_TRANSCRIPTS = []
 CPU_COUNT = max(1, multiprocessing.cpu_count())
 MAX_WORKERS = min(4, CPU_COUNT)  # tune for your environment
     and set LAST_EXTRACT_DIR to the extraction folder for download.
     Returns (friendly_list, logs_str)
     """
+    global EXTRACT_MAP, LAST_EXTRACT_DIR, LAST_EXTRACT_LIST
     EXTRACT_MAP = {}
     LAST_EXTRACT_DIR = None
+    LAST_EXTRACT_LIST = []
     run_id = uuid4().hex
     temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
     logs = []
             return [], "\n".join(logs)
         friendly = sorted(EXTRACT_MAP.keys())
         LAST_EXTRACT_DIR = temp_extract_dir
+        LAST_EXTRACT_LIST = friendly[:]
         return friendly, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
     except Exception as e:
         return None, f"Failed to create ZIP: {e}"
+# ---------- Merge uploaded text files into single Word file ----------
+def merge_text_files_to_docx(uploaded_text_files):
+    """
+    Accepts a list of uploaded text file paths (or single path), merges them in order into one .docx and returns path.
+    """
+    if not uploaded_text_files:
+        return None, "No files provided."
+    if isinstance(uploaded_text_files, (str, os.PathLike)):
+        uploaded_text_files = [str(uploaded_text_files)]
+    elif isinstance(uploaded_text_files, dict) and uploaded_text_files.get("name"):
+        uploaded_text_files = [uploaded_text_files["name"]]
+    elif isinstance(uploaded_text_files, (list, tuple)):
+        normalized = []
+        for f in uploaded_text_files:
+            if isinstance(f, (str, os.PathLike)):
+                normalized.append(str(f))
+            elif isinstance(f, dict) and f.get("name"):
+                normalized.append(f["name"])
+            elif hasattr(f, "name"):
+                normalized.append(f.name)
+        uploaded_text_files = normalized
+    combined = []
+    for p in uploaded_text_files:
+        if not os.path.exists(p):
+            continue
+        try:
+            with open(p, "r", encoding="utf-8") as fh:
+                txt = fh.read()
+        except Exception:
+            with open(p, "r", encoding="latin-1", errors="replace") as fh:
+                txt = fh.read()
+        header = f"\n\n--- {os.path.basename(p)} ---\n\n"
+        combined.append(header + txt)
+    if not combined:
+        return None, "No readable text files."
+    merged_text = "\n".join(combined)
+    out_path = save_as_word(merged_text)
+    return out_path, "Merged"
+# ---------- NEW: merge last batch transcripts ----------
+def merge_last_batch_transcripts():
+    """
+    Merge txt transcripts created by the last batch run (LAST_BATCH_TRANSCRIPTS) into a single .docx.
+    Returns (path_or_None, message)
+    """
+    global LAST_BATCH_TRANSCRIPTS
+    if not LAST_BATCH_TRANSCRIPTS:
+        return None, "No last-batch transcripts available."
+    combined = []
+    for fname, txtp, srtp in LAST_BATCH_TRANSCRIPTS:
+        if not txtp or not os.path.exists(txtp):
+            continue
+        try:
+            with open(txtp, "r", encoding="utf-8", errors="replace") as fh:
+                txt = fh.read()
+        except Exception:
+            try:
+                with open(txtp, "r", encoding="latin-1", errors="replace") as fh:
+                    txt = fh.read()
+            except Exception:
+                txt = ""
+        header = f"\n\n--- {fname} ---\n\n"
+        combined.append(header + txt)
+    if not combined:
+        return None, "No readable last-batch transcript files found."
+    merged_text = "\n".join(combined)
+    out_path = save_as_word(merged_text)
+    return out_path, f"Merged {len(combined)} files."
 # ---------- Batch transcription generator (streaming) ----------
 def batch_transcribe_parallel_generator(
     friendly_selected,
     refine_threshold=-1.0,
     zip_password=None,
 ):
+    global LAST_BATCH_TRANSCRIPTS
+    LAST_BATCH_TRANSCRIPTS = []  # reset at start
     logs = []
     transcripts = []
     per_file_paths = []
                 pct = int(5 + (completed / total) * 90)
                 yield "\n\n".join(logs), "\n\n".join(transcripts), None, pct
+        # Save per-file transcript list into global for later merging/downloading
+        LAST_BATCH_TRANSCRIPTS = per_file_paths[:]
         combined = "\n\n".join(transcripts)
         out_doc = None
         if merge_flag:
 .card { background:var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(16,24,40,0.04); }
 .transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background: var(--transcript-bg); color: var(--transcript-color); padding:12px; border-radius:8px; min-height:200px; }
 .small-note { color:var(--muted); font-size:12px;}
+.btn-row { display:flex; gap:8px; margin-top:8px; }
 """
 with gr.Blocks(title="Whisper Transcriber — Parallel + Memory", css=CSS) as demo:
                     trans_single_btn = gr.Button("Transcribe")
                 with gr.Column(scale=1):
                     single_trans_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
+            # LOGS at bottom
+            single_logs = gr.Textbox(label="Logs", lines=6, interactive=False)
             def _do_single(audio, model_name, device_name, mem_on, srt_on):
                 if not audio:
                     batch_files = gr.File(label="Upload audio files", file_count="multiple", type="filepath")
                     batch_zip = gr.File(label="Or upload ZIP (optional)", file_count="single", type="filepath")
                     batch_zip_pass = gr.Textbox(label="ZIP password (if any)", value=DEFAULT_ZIP_PASS)
+                    # Extract and populate list
                     batch_preview_btn = gr.Button("Extract & List ZIP files")
+                    batch_preview_out = gr.Textbox(label="ZIP members (preview)", lines=4, interactive=False)
                     batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files to include", interactive=True)
+                    # select-all / clear buttons
+                    batch_select_all_btn = gr.Button("Select All extracted")
+                    batch_clear_select_btn = gr.Button("Clear selection")
+                    # Download extracted and merge uploaded texts
                     batch_download_extracted_btn = gr.Button("Download extracted folder")
                     batch_extracted_zip = gr.File(label="Downloaded extracted ZIP")
+                    gr.Markdown("### Merge text files")
+                    merge_text_files_input = gr.File(label="Upload text files to merge (.txt/.srt/.json)", file_count="multiple", type="filepath")
+                    merge_text_btn = gr.Button("Merge uploaded text files -> DOCX")
+                    merge_text_out = gr.File(label="Merged DOCX download")
+                    # NEW: Merge last batch transcripts
+                    merge_last_batch_btn = gr.Button("Merge Last Batch Transcripts")
+                    merge_last_batch_status = gr.Textbox(label="Last-batch merge status", lines=3, interactive=False)
+                    merge_last_batch_download = gr.File(label="Merged last-batch DOCX")
+                    # Transcription parameters
                     batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
                     batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
+                    batch_merge = gr.Checkbox(label="Merge transcripts into DOCX after run", value=True)
                     batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
                     batch_srt = gr.Checkbox(label="Generate SRTs", value=False)
                     batch_use_two_pass = gr.Checkbox(label="Use two-pass refinement", value=False)
                     batch_refine_thresh = gr.Number(value=-1.0, label="Refine threshold", precision=2)
                     batch_run_btn = gr.Button("Start Batch (parallel)")
                 with gr.Column(scale=1):
                     batch_combined_out = gr.Textbox(label="Combined transcripts", lines=12, interactive=False)
                     batch_progress = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Progress (%)", interactive=False)
                     batch_zip_download = gr.File(label="Download per-file transcripts ZIP")
                     batch_doc_download = gr.File(label="Download merged DOCX (if created)")
+            # Logs at bottom
+            batch_logs_out = gr.Textbox(label="Logs", lines=8, interactive=False)
             def _preview_zip_and_populate(zip_file, password):
                 """
+                Extract the zip, populate EXTRACT_MAP and return updated CheckboxGroup choices + preview text.
                 """
                 if not zip_file:
                     return gr.update(choices=[]), "No ZIP provided."
                 path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
                 friendly, logs = extract_zip_and_map(path, password)
                 if friendly:
+                    return gr.update(choices=friendly), "\n".join(friendly)
                 return gr.update(choices=[]), logs
             batch_preview_btn.click(fn=_preview_zip_and_populate, inputs=[batch_zip, batch_zip_pass], outputs=[batch_select, batch_preview_out])
+            def _select_all_batch():
+                # uses LAST_EXTRACT_LIST set by extract
+                global LAST_EXTRACT_LIST
+                if LAST_EXTRACT_LIST:
+                    return gr.update(value=LAST_EXTRACT_LIST)
+                return gr.update(value=[])
+            batch_select_all_btn.click(fn=_select_all_batch, inputs=[], outputs=[batch_select])
+            def _clear_batch_select():
+                return gr.update(value=[])
+            batch_clear_select_btn.click(fn=_clear_batch_select, inputs=[], outputs=[batch_select])
             def _download_extracted_wrapper():
                 zip_path, msg = download_extracted_folder()
                 if zip_path:
                     return zip_path
                 return None
             batch_download_extracted_btn.click(fn=_download_extracted_wrapper, inputs=[], outputs=[batch_extracted_zip])
+            def _merge_texts(uploaded_texts):
+                if not uploaded_texts:
+                    return None, "No files provided."
+                out_path, msg = merge_text_files_to_docx(uploaded_texts)
+                if out_path:
+                    return out_path
+                return None, msg
+            merge_text_btn.click(fn=_merge_texts, inputs=[merge_text_files_input], outputs=[merge_text_out])
+            def _merge_last_batch_action():
+                """
+                Merge last batch transcripts (global LAST_BATCH_TRANSCRIPTS) into docx and return file path.
+                """
+                path, msg = merge_last_batch_transcripts()
+                if path:
+                    return path, msg
+                return None, msg
+            merge_last_batch_btn.click(fn=_merge_last_batch_action, inputs=[], outputs=[merge_last_batch_download, merge_last_batch_status])
             # wrapper generator — Gradio expects the function itself to be a generator that yields streaming tuples
             def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
                 up = uploaded_files
                 with gr.Column(scale=1):
                     mem_upload = gr.File(label="Upload memory files or ZIP (multiple)", file_count="multiple", type="filepath")
                     mem_preview_zip_btn = gr.Button("Preview ZIP members (for selected ZIPs)")
+                    mem_zip_preview_out = gr.Textbox(label="ZIP members (preview)", lines=4, interactive=False)
                     mem_zip_select = gr.CheckboxGroup(choices=[], label="Select ZIP members to import", interactive=True)
+                    mem_select_all_btn = gr.Button("Select All members")
+                    mem_clear_select_btn = gr.Button("Clear selection")
                     mem_import_btn = gr.Button("Import selected files / uploaded files")
+                    mem_status = gr.Textbox(label="Import status", lines=6, interactive=False)
                     mem_textbox = gr.Textbox(label="Add single word/phrase", placeholder="Type word or phrase")
                     mem_add_btn = gr.Button("Add to memory")
                     mem_clear_btn = gr.Button("Clear memory")
                         "- Supported encodings: utf-8, utf-16, latin-1, fallback.\n"
                         "- JSON format: {\"words\":{\"word\":count}, \"phrases\":{\"phrase\":count}}"
                     )
+            # Logs at bottom
+            mem_logs = gr.Textbox(label="Logs", lines=6, interactive=False)
             def _preview_many_zip(uploaded):
                 if not uploaded:
                         members, log = preview_zip_members_for_memory(str(f))
                         members_total.extend(members)
                 if members_total:
+                    # set mem_zip_select choices via update
                     return "\n".join(members_total)
                 return "No ZIPs found or no previewable members."
             mem_preview_zip_btn.click(fn=_preview_many_zip, inputs=[mem_upload], outputs=[mem_zip_preview_out])
+            def _select_all_mem():
+                # try to use preview box content (not ideal) — but we stored last extract list globally as LAST_EXTRACT_LIST
+                global LAST_EXTRACT_LIST
+                if LAST_EXTRACT_LIST:
+                    return gr.update(value=LAST_EXTRACT_LIST)
+                return gr.update(value=[])
+            mem_select_all_btn.click(fn=_select_all_mem, inputs=[], outputs=[mem_zip_select])
+            mem_clear_select_btn.click(fn=_clear_batch_select, inputs=[], outputs=[mem_zip_select])
             def _import_mem(uploaded_files, selected_members):
                 try:
                     status = import_memory_files_multiple(uploaded_files, zip_members_to_import=selected_members)