Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 21, 2025

Commit

de86414

verified ·

1 Parent(s): fc6402d

Update app.py

Browse files

Files changed (1) hide show

app.py +379 -187

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # app.py
-# Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
-# Multi-tab UI, zip extraction + selectable files, memory management
 import os
 import sys
@@ -12,13 +13,14 @@ import traceback
 import threading
 import re
 from difflib import get_close_matches
-# Force unbuffered output
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
-# Third-party imports
 try:
     from docx import Document
     import whisper
@@ -43,9 +45,15 @@ FFMPEG_CANDIDATES = [
     ("pcm_s16le", 44100, 2),
     ("mulaw", 8000, 1),
 ]
 # ----------------------------
-# ---------- Memory helpers ----------
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
@@ -77,9 +85,8 @@ def save_memory(mem):
 memory = load_memory()
-print("DEBUG: memory loaded (words=%d phrases=%d)" % (len(memory.get("words", {})), len(memory.get("phrases", {}))), flush=True)
-# ---------- Postprocessing (same as before) ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
@@ -160,7 +167,6 @@ def postprocess_transcript(text, format_soap=False):
     return t
-# ---------- Memory utilities (same as before) ----------
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
@@ -228,7 +234,7 @@ def memory_correct_text(text, min_ratio=0.85):
     return corrected
-# ---------- Memory management UI helpers (same as before) ----------
 def import_memory_file(uploaded):
     global memory
     if not uploaded:
@@ -340,7 +346,7 @@ def save_as_word(text, filename=None):
     return filename
-# ---------- improved ffmpeg convert ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
@@ -465,15 +471,11 @@ def get_whisper_model(name, device=None):
     return MODEL_CACHE[name]
-# ---------- ZIP extraction + selection helpers ----------
 def extract_zip_list(zip_file, zip_password):
-    """
-    Extract zip to a temp dir and return (list_of_paths, diagnostics_text)
-    """
     temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
     try:
         if os.path.exists(temp_extract_dir):
-            # clear existing
             try:
                 shutil.rmtree(temp_extract_dir)
             except Exception:
@@ -511,14 +513,13 @@ def extract_zip_list(zip_file, zip_password):
         if not extracted:
             logs.append("No supported audio files found in zip.")
             return [], "\n".join(logs)
-        # Return list and logs
         return extracted, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
         return [], f"Extraction failed: {e}"
-# ---------- Main transcription generator (updated to accept explicit 'selected_paths') ----------
 def transcribe_multiple(
     selected_paths,
     model_name,
@@ -527,10 +528,6 @@ def transcribe_multiple(
     enable_memory=False,
     device=None,
 ):
-    """
-    Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
-    selected_paths: list of absolute file paths to process
-    """
     log = []
     transcripts = []
     word_file_path = None
@@ -542,7 +539,6 @@ def transcribe_multiple(
     yield "", "", None, 0
-    # load model
     yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
     try:
         model = get_whisper_model(model_name, device=device)
@@ -554,16 +550,16 @@ def transcribe_multiple(
     total = len(selected_paths)
     for idx, p in enumerate(selected_paths, start=1):
-        log.append(f"Processing file ({idx}/{total}): {p}")
         yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx - 1) * 80 / max(1, total))
         wav = None
         try:
             wav = convert_to_wav_if_needed(p)
-            log.append(f"Converted to WAV: {wav}")
         except Exception as e:
-            log.append(f"Conversion failed for {p}: {e}")
-            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}")
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
             continue
@@ -579,7 +575,7 @@ def transcribe_multiple(
             if enable_memory:
                 text = memory_correct_text(text)
             text = postprocess_transcript(text)
-            transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
             if enable_memory:
                 try:
@@ -590,8 +586,8 @@ def transcribe_multiple(
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
         except Exception as e:
-            log.append(f"Transcription failed for {p}: {e}")
-            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}")
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
             continue
         finally:
@@ -600,11 +596,11 @@ def transcribe_multiple(
                     tmpdir = tempfile.gettempdir()
                     try:
                         common = os.path.commonpath([os.path.abspath(tmpdir), os.path.abspath(wav)])
-                        if common == os.path.abspath(tmpdir) and not p.lower().endswith(".wav"):
                             os.unlink(wav)
                     except Exception:
                         try:
-                            if tmpdir in os.path.abspath(wav) and not p.lower().endswith(".wav"):
                                 os.unlink(wav)
                         except Exception:
                             pass
@@ -623,177 +619,330 @@ def transcribe_multiple(
     yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
-# ----------------------- Gradio callbacks & UI -----------------------
-def extract_zip_for_ui(zip_file, zip_password, use_default_zip_pass, default_zip_password):
-    """
-    Extract zip and return (checkbox_choices, logs)
-    """
-    if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
-        final_zip_password = default_zip_password
-    else:
-        final_zip_password = zip_password
-    if not zip_file:
-        return [], "No ZIP file provided."
-    # Normalize zip path
-    zip_path = None
-    if isinstance(zip_file, (str, os.PathLike)):
-        zip_path = str(zip_file)
-    elif hasattr(zip_file, "name"):
-        zip_path = zip_file.name
-    elif isinstance(zip_file, dict) and zip_file.get("name"):
-        zip_path = zip_file["name"]
-    else:
-        return [], "Unable to determine uploaded zip path."
-    extracted, logs = extract_zip_list(zip_path, final_zip_password)
-    # For the UI we show readable labels but the choices list will hold full paths
-    choices = extracted  # list of paths (strings)
-    return choices, logs or "Extraction completed."
-def run_transcription_ui(selected_files, file_input, model_name, merge, zip_selected_files, zip_file, zip_password, use_default_zip_pass, default_zip_password, enable_memory, device_choice):
-    """
-    Top-level UI handler invoked by the Transcribe button.
-    Priority:
-      1) zip_selected_files: explicit selection of extracted files (checkbox group)
-      2) selected_files from file_input (file input paths)
-      3) zip_file without explicit selection -> extract all then transcribe
-    This function returns a Gradio generator (yields) using transcribe_multiple.
-    """
-    # build final list of files to process
-    final_paths = []
-    # If the user selected extracted zip files (zip_selected_files is list of paths), use those
-    if zip_selected_files:
-        final_paths = zip_selected_files if isinstance(zip_selected_files, (list, tuple)) else [zip_selected_files]
     else:
-        # if file_input provided (list of paths), use them
-        if file_input:
-            if isinstance(file_input, (list, tuple)):
-                for a in file_input:
-                    if a:
-                        # file_input uses type="filepath" so entries are paths
-                        final_paths.append(str(a))
-            elif isinstance(file_input, str):
-                final_paths.append(file_input)
-        # if nothing chosen and zip_file provided, auto-extract all and use them
-        if not final_paths and zip_file:
-            # reuse extract logic
-            if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
-                final_zip_password = default_zip_password
-            else:
-                final_zip_password = zip_password
-            zip_path = None
-            if isinstance(zip_file, (str, os.PathLike)):
-                zip_path = str(zip_file)
-            elif hasattr(zip_file, "name"):
-                zip_path = zip_file.name
-            elif isinstance(zip_file, dict) and zip_file.get("name"):
-                zip_path = zip_file["name"]
-            if zip_path:
-                extracted, logs = extract_zip_list(zip_path, final_zip_password)
-                final_paths = extracted
-    # call core generator
-    adv = {}
-    device = None
-    if device_choice and device_choice != "auto":
-        device = device_choice  # 'cpu' or 'cuda'
     try:
-        for logs_text, transcripts_text, word_path, percent in transcribe_multiple(
-            final_paths,
-            model_name,
-            adv,
-            merge_checkbox=merge,
-            enable_memory=enable_memory,
-            device=device,
-        ):
-            yield logs_text, transcripts_text, word_path, percent
-    except Exception:
-        tb = traceback.format_exc()
-        logs_text = f"EXCEPTION in run_transcription_ui:\n{tb}"
-        transcripts_text = "ERROR: transcription did not start or failed unexpectedly."
-        yield logs_text, transcripts_text, None, 100
-# Build UI (Tabs)
 print("DEBUG: building Gradio Blocks", flush=True)
-with gr.Blocks(title="Whisper Transcriber — Multi-tab") as demo:
-    gr.Markdown(
-        "<h2>Whisper Transcriber</h2>"
-        "<p>Upload audio files or a ZIP, extract and choose files, then transcribe.</p>",
-    )
     with gr.Tabs():
-        # ---------------- Transcribe Tab ----------------
-        with gr.TabItem("Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### Inputs")
-                    file_input = gr.File(label="Audio files (optional)", file_count="multiple", type="filepath", height=80)
-                    zip_input = gr.File(label="ZIP with audio (optional)", file_count="single", type="filepath", height=80)
                     with gr.Row():
-                        zip_password = gr.Textbox(label="ZIP password (override)", placeholder="Optional")
-                        use_default_zip_pass = gr.Checkbox(label="Use default ZIP password", value=False)
-                        default_zip_password = gr.Textbox(label="Default ZIP password", value="", interactive=True)
-                    model_select = gr.Dropdown(choices=["small", "medium", "large", "base"], value="small", label="Whisper model")
-                    device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device (auto tries default)")
-                    merge_checkbox = gr.Checkbox(label="Merge all transcripts into one .docx", value=True)
-                    memory_checkbox = gr.Checkbox(label="Enable correction memory", value=False)
-                    gr.Markdown("### ZIP extraction & file selection")
-                    extract_btn = gr.Button("Extract ZIP & List Files")
-                    extracted_files_check = gr.CheckboxGroup(choices=[], label="Select extracted files to transcribe (optional)", interactive=True)
-                    extract_logs = gr.Textbox(label="Extraction logs", interactive=False, lines=6)
-                    # action buttons
-                    transcribe_btn = gr.Button("Transcribe Selected / Uploaded")
                 with gr.Column(scale=1):
                     gr.Markdown("### Output")
-                    transcripts_out = gr.Textbox(label="Transcript", lines=20, interactive=False)
-                    progress_num = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Progress (%)", interactive=False)
-                    download_file = gr.File(label="Merged .docx (when available)")
-                    logs = gr.Textbox(label="Logs", lines=12, interactive=False)
-            # Wire extract button
-            def _extract_click(zip_file, zip_password, use_default_zip_pass, default_zip_password):
-                choices, logstxt = extract_zip_for_ui(zip_file, zip_password, use_default_zip_pass, default_zip_password)
-                # choices are paths; show them in CheckboxGroup
-                return choices, logstxt
-            extract_btn.click(fn=_extract_click, inputs=[zip_input, zip_password, use_default_zip_pass, default_zip_password], outputs=[extracted_files_check, extract_logs])
-            # Wire transcribe button: need to pass selected extracted files (list), file_input, model, merge, zip file (for fallback), etc.
-            transcribe_btn.click(
-                fn=run_transcription_ui,
                 inputs=[
-                    extracted_files_check,  # zip_selected_files
-                    file_input,             # file_input
-                    model_select,
-                    merge_checkbox,
-                    # pass in zip file so fallback is possible
-                    extracted_files_check,  # placeholder to keep ordering (not used) - we will also pass zip_input below
-                    zip_input,
-                    zip_password,
-                    use_default_zip_pass,
-                    default_zip_password,
-                    memory_checkbox,
-                    device_choice,
                 ],
-                outputs=[logs, transcripts_out, download_file, progress_num],
             )
-        # ---------------- Memory Tab ----------------
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    gr.Markdown("### Memory Tools")
                     mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="filepath")
                     mem_import_btn = gr.Button("Import Memory File")
                     mem_manual_entry = gr.Textbox(label="Add word/phrase to memory (manual)", placeholder="Type a word or phrase")
@@ -802,7 +951,6 @@ with gr.Blocks(title="Whisper Transcriber — Multi-tab") as demo:
                     mem_view_btn = gr.Button("View Memory")
                     mem_status = gr.Textbox(label="Memory status", interactive=False, lines=12)
-            # memory bindings
             def _import_mem(uploaded):
                 return import_memory_file(uploaded)
@@ -811,22 +959,66 @@ with gr.Blocks(title="Whisper Transcriber — Multi-tab") as demo:
             mem_clear_btn.click(fn=lambda: clear_memory(), inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
-        # ---------------- Settings Tab ----------------
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("### Settings")
-                    gr.Markdown("- Use `Device` in Transcribe tab to force CPU/GPU. Default uses whisper's choice.")
-                    gr.Markdown("- `Default ZIP password` is empty by default for safety.")
-                    gr.Markdown("- If you want extracted-file preview before transcribing, click **Extract ZIP & List Files** first.")
                 with gr.Column():
                     gr.Markdown("### Diagnostics")
                     diag_btn = gr.Button("Show memory summary")
                     diag_out = gr.Textbox(label="Diagnostics output", interactive=False, lines=12)
                     diag_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[diag_out])
-    # end tabs
 # ---------- Launch ----------
 if __name__ == "__main__":

 # app.py
+# Whisper transcription app - Redesigned UI: Tabs for different works
+# Features: Audio Transcribe, Batch Transcribe (ZIP extraction + selection), Memory, Fine-tune, Settings
+# Drop-in replacement. Requires dependencies: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg.
 import os
 import sys
 import threading
 import re
 from difflib import get_close_matches
+from pathlib import Path
+# Force unbuffered output so container logs show prints immediately
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
+# Third-party imports (must be installed in the environment)
 try:
     from docx import Document
     import whisper
     ("pcm_s16le", 44100, 2),
     ("mulaw", 8000, 1),
 ]
+# Fine-tune globals
+FINETUNE_PROC = None
+FINETUNE_LOCK = threading.Lock()
+FINETUNE_LOG = os.path.join(tempfile.gettempdir(), "finetune_logs.txt")
+FINETUNE_WORKDIR = os.path.join(tempfile.gettempdir(), "finetune_workdir")
+os.makedirs(FINETUNE_WORKDIR, exist_ok=True)
 # ----------------------------
+# ---------- Utilities / Memory / Postprocessing ----------
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
 memory = load_memory()
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
     return t
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return corrected
+# ---------- Memory management helpers ----------
 def import_memory_file(uploaded):
     global memory
     if not uploaded:
     return filename
+# ---------- Conversion helpers (pydub + ffmpeg fallback) ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
     return MODEL_CACHE[name]
+# ---------- ZIP extraction helpers ----------
 def extract_zip_list(zip_file, zip_password):
     temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
     try:
         if os.path.exists(temp_extract_dir):
             try:
                 shutil.rmtree(temp_extract_dir)
             except Exception:
         if not extracted:
             logs.append("No supported audio files found in zip.")
             return [], "\n".join(logs)
         return extracted, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
         return [], f"Extraction failed: {e}"
+# ---------- Transcription generator used by both Audio and Batch workflows ----------
 def transcribe_multiple(
     selected_paths,
     model_name,
     enable_memory=False,
     device=None,
 ):
     log = []
     transcripts = []
     word_file_path = None
     yield "", "", None, 0
     yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
     try:
         model = get_whisper_model(model_name, device=device)
     total = len(selected_paths)
     for idx, p in enumerate(selected_paths, start=1):
+        log.append(f"Processing file ({idx}/{total}): {os.path.basename(str(p))}")
         yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx - 1) * 80 / max(1, total))
         wav = None
         try:
             wav = convert_to_wav_if_needed(p)
+            log.append(f"Converted to WAV: {os.path.basename(str(wav))}")
         except Exception as e:
+            log.append(f"Conversion failed for {os.path.basename(str(p))}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(str(p))}\nERROR: Conversion failed: {e}")
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
             continue
             if enable_memory:
                 text = memory_correct_text(text)
             text = postprocess_transcript(text)
+            transcripts.append(f"FILE: {os.path.basename(str(p))}\n{text}\n")
             if enable_memory:
                 try:
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
         except Exception as e:
+            log.append(f"Transcription failed for {os.path.basename(str(p))}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(str(p))}\nERROR: Transcription failed: {e}")
             yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
             continue
         finally:
                     tmpdir = tempfile.gettempdir()
                     try:
                         common = os.path.commonpath([os.path.abspath(tmpdir), os.path.abspath(wav)])
+                        if common == os.path.abspath(tmpdir) and not str(p).lower().endswith(".wav"):
                             os.unlink(wav)
                     except Exception:
                         try:
+                            if tmpdir in os.path.abspath(wav) and not str(p).lower().endswith(".wav"):
                                 os.unlink(wav)
                         except Exception:
                             pass
     yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
+# ---------- Fine-tune helpers (same as earlier) ----------
+def _safe_write_log(msg):
+    try:
+        with open(FINETUNE_LOG, "a", encoding="utf-8") as fh:
+            fh.write(msg + "\n")
+    except Exception:
+        pass
+def prepare_finetune_dataset(uploaded_zip_or_dir):
+    dst = os.path.join(FINETUNE_WORKDIR, "data")
+    try:
+        if os.path.exists(dst):
+            shutil.rmtree(dst)
+        os.makedirs(dst, exist_ok=True)
+    except Exception as e:
+        return f"Failed to prepare workdir: {e}", ""
+    path = None
+    try:
+        if not uploaded_zip_or_dir:
+            return "No dataset file or dir provided.", ""
+        if isinstance(uploaded_zip_or_dir, (str, os.PathLike)):
+            path = str(uploaded_zip_or_dir)
+        elif hasattr(uploaded_zip_or_dir, "name"):
+            path = uploaded_zip_or_dir.name
+        elif isinstance(uploaded_zip_or_dir, dict) and uploaded_zip_or_dir.get("name"):
+            path = uploaded_zip_or_dir["name"]
+    except Exception as e:
+        return f"Unable to determine uploaded path: {e}", ""
+    if os.path.isfile(path) and path.lower().endswith(".zip"):
+        try:
+            with pyzipper.ZipFile(path, "r") as zf:
+                zf.extractall(dst)
+        except Exception as e:
+            return f"Failed to extract ZIP: {e}", ""
+    elif os.path.isdir(path):
+        try:
+            for item in os.listdir(path):
+                s = os.path.join(path, item)
+                d = os.path.join(dst, item)
+                if os.path.isdir(s):
+                    shutil.copytree(s, d)
+                else:
+                    shutil.copy2(s, d)
+        except Exception as e:
+            return f"Failed to copy dataset dir: {e}", ""
     else:
+        return "Uploaded file is not zip or directory.", ""
+    transcripts_candidates = [
+        os.path.join(dst, "transcripts.tsv"),
+        os.path.join(dst, "metadata.tsv"),
+        os.path.join(dst, "manifest.tsv"),
+        os.path.join(dst, "transcripts.txt"),
+        os.path.join(dst, "metadata.txt"),
+    ]
+    manifest_path = os.path.join(FINETUNE_WORKDIR, "manifest.tsv")
+    found = False
+    for tpath in transcripts_candidates:
+        if os.path.exists(tpath):
+            try:
+                shutil.copy2(tpath, manifest_path)
+                found = True
+                break
+            except Exception:
+                pass
+    if not found:
+        audio_files = []
+        for root, _, files in os.walk(dst):
+            for f in files:
+                if f.lower().endswith((".wav", ".mp3", ".flac", ".m4a", ".ogg")):
+                    audio_files.append(os.path.join(root, f))
+        if not audio_files:
+            return "No audio files found in dataset.", ""
+        entries = []
+        for a in audio_files:
+            base = os.path.splitext(a)[0]
+            t_candidate = base + ".txt"
+            transcript = ""
+            if os.path.exists(t_candidate):
+                try:
+                    with open(t_candidate, "r", encoding="utf-8") as fh:
+                        transcript = fh.read().strip().replace("\n", " ")
+                except Exception:
+                    transcript = ""
+            entries.append(f"{a}\t{transcript}")
+        try:
+            with open(manifest_path, "w", encoding="utf-8") as fh:
+                fh.write("\n".join(entries))
+            found = True
+        except Exception as e:
+            return f"Failed to write manifest: {e}", ""
+    if not found:
+        return "Failed to locate or build manifest.", ""
+    return f"Dataset prepared. Manifest: {manifest_path}", manifest_path
+def start_finetune(manifest_path, base_model, epochs, batch_size, lr, output_dir):
+    global FINETUNE_PROC
+    with FINETUNE_LOCK:
+        if FINETUNE_PROC and FINETUNE_PROC.poll() is None:
+            return "Fine-tune already running."
+        outdir = output_dir or os.path.join(FINETUNE_WORKDIR, "output")
+        os.makedirs(outdir, exist_ok=True)
+        try:
+            if os.path.exists(FINETUNE_LOG):
+                os.remove(FINETUNE_LOG)
+        except Exception:
+            pass
+        START_CMD = [
+            sys.executable,
+            "fine_tune.py",
+            "--manifest",
+            manifest_path,
+            "--base_model",
+            base_model,
+            "--epochs",
+            str(epochs),
+            "--batch_size",
+            str(batch_size),
+            "--lr",
+            str(lr),
+            "--output_dir",
+            outdir,
+        ]
+        try:
+            logfile = open(FINETUNE_LOG, "a", encoding="utf-8")
+            proc = subprocess.Popen(START_CMD, stdout=logfile, stderr=logfile, cwd=os.getcwd())
+            FINETUNE_PROC = proc
+            _safe_write_log(f"Started fine-tune: PID={proc.pid}, cmd={' '.join(START_CMD)}")
+            return f"Fine-tune started (PID={proc.pid}). Logs: {FINETUNE_LOG}"
+        except FileNotFoundError as e:
+            return f"Training script not found: {e}. Put your training script 'fine_tune.py' in project root or change START_CMD."
+        except Exception as e:
+            return f"Failed to start fine-tune: {e}"
+def stop_finetune():
+    global FINETUNE_PROC
+    with FINETUNE_LOCK:
+        if not FINETUNE_PROC:
+            return "No running fine-tune process."
+        try:
+            FINETUNE_PROC.terminate()
+            FINETUNE_PROC.wait(timeout=10)
+            pid = FINETUNE_PROC.pid
+            FINETUNE_PROC = None
+            _safe_write_log(f"Terminated fine-tune PID={pid}")
+            return f"Terminated fine-tune PID={pid}"
+        except Exception as e:
+            try:
+                FINETUNE_PROC.kill()
+            except Exception:
+                pass
+            FINETUNE_PROC = None
+            return f"Force killed fine-tune process: {e}"
+def tail_finetune_logs(lines=50):
     try:
+        if not os.path.exists(FINETUNE_LOG):
+            return "No logs yet."
+        with open(FINETUNE_LOG, "r", encoding="utf-8", errors="ignore") as fh:
+            all_lines = fh.read().splitlines()
+            last = all_lines[-lines:]
+            return "\n".join(last)
+    except Exception as e:
+        return f"Failed to read logs: {e}"
+# ----------------------- Gradio UI -----------------------
 print("DEBUG: building Gradio Blocks", flush=True)
+with gr.Blocks(title="Whisper Transcriber — Redesigned UI") as demo:
+    gr.Markdown("<h1 style='margin-bottom:0.25rem;'>Whisper Transcriber</h1>")
+    gr.Markdown("<p style='margin-top:0.1rem;color:#666;'>Organize work by tabs. Quick single-file transcription, batch workflows, memory and fine-tune tools.</p>")
     with gr.Tabs():
+        # ---------------- Audio Transcribe (single-file focused) ----------------
+        with gr.TabItem("Audio Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Quick audio transcription")
+                    single_audio = gr.Audio(label="Upload or record an audio file", type="filepath", interactive=True)
                     with gr.Row():
+                        audio_model = gr.Dropdown(choices=["small", "medium", "large", "base"], value="small", label="Model", interactive=True)
+                        audio_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device", interactive=True)
+                    audio_enable_memory = gr.Checkbox(label="Enable correction memory", value=False)
+                    audio_transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
+                    audio_help = gr.Markdown("<small>Upload a single audio file (wav/mp3/m4a/ogg). Use memory to apply learned corrections.</small>")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Player & Transcript")
+                    audio_player_out = gr.Audio(label="Player", interactive=False)
+                    audio_transcript_out = gr.Textbox(label="Transcript", lines=16, interactive=False)
+                    audio_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
+            def _single_transcribe(audio_path, model_name, enable_memory, device_choice):
+                logs = []
+                transcripts = []
+                if not audio_path:
+                    return None, "No audio uploaded.", "No file provided."
+                # Normalize single path (gr.Audio returns path)
+                path = str(audio_path)
+                try:
+                    model = get_whisper_model(model_name, device=(None if device_choice == "auto" else device_choice))
+                    logs.append(f"Loaded model: {model_name}")
+                except Exception as e:
+                    tb = traceback.format_exc()
+                    return None, "", f"Failed to load model: {e}\n{tb}"
+                try:
+                    wav = convert_to_wav_if_needed(path)
+                    logs.append(f"Converted to WAV: {os.path.basename(wav)}")
+                except Exception as e:
+                    return None, "", f"Conversion failed: {e}"
+                try:
+                    result = model.transcribe(wav)
+                    text = result.get("text", "").strip()
+                    if enable_memory:
+                        text = memory_correct_text(text)
+                    text = postprocess_transcript(text)
+                    transcripts = text
+                    # update memory optionally
+                    if enable_memory:
+                        try:
+                            update_memory_with_transcript(text)
+                            logs.append("Memory updated.")
+                        except Exception:
+                            pass
+                except Exception as e:
+                    return None, "", f"Transcription failed: {e}"
+                finally:
+                    try:
+                        if wav and os.path.exists(wav) and wav != path:
+                            # remove tmp wav produced by conversion
+                            try:
+                                os.unlink(wav)
+                            except Exception:
+                                pass
+                    except Exception:
+                        pass
+                # audio_player_out accepts filepath
+                return path, transcripts, "\n".join(logs)
+            audio_transcribe_btn.click(fn=_single_transcribe, inputs=[single_audio, audio_model, audio_enable_memory, audio_device], outputs=[audio_player_out, audio_transcript_out, audio_logs])
+        # ---------------- Batch Transcribe ----------------
+        with gr.TabItem("Batch Transcribe"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Batch / ZIP workflow")
+                    batch_files = gr.File(label="Upload multiple audio files (optional)", file_count="multiple", type="filepath")
+                    batch_zip = gr.File(label="Or upload ZIP with audio", file_count="single", type="filepath")
+                    with gr.Row():
+                        batch_zip_password = gr.Textbox(label="ZIP password (override)", placeholder="Optional")
+                        batch_use_default_zip_pass = gr.Checkbox(label="Use default ZIP password", value=False)
+                        batch_default_zip_password = gr.Textbox(label="Default ZIP password", value="", interactive=True)
+                    batch_model = gr.Dropdown(choices=["small", "medium", "large", "base"], value="small", label="Model")
+                    batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
+                    batch_merge = gr.Checkbox(label="Merge all transcripts into one .docx", value=True)
+                    batch_enable_memory = gr.Checkbox(label="Enable correction memory", value=False)
+                    gr.Markdown("### Extraction")
+                    batch_extract_btn = gr.Button("Extract ZIP & List Files")
+                    batch_extracted_check = gr.CheckboxGroup(choices=[], label="Select extracted files to transcribe (optional)", interactive=True)
+                    batch_extract_logs = gr.Textbox(label="Extraction logs", interactive=False, lines=6)
+                    batch_transcribe_btn = gr.Button("Transcribe Selected / Uploaded", variant="primary")
                 with gr.Column(scale=1):
                     gr.Markdown("### Output")
+                    batch_transcripts_out = gr.Textbox(label="Transcript (cumulative)", lines=20, interactive=False)
+                    batch_progress = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Progress (%)", interactive=False)
+                    batch_download_file = gr.File(label="Merged .docx (when available)")
+                    batch_logs = gr.Textbox(label="Logs", lines=12, interactive=False)
+            def _batch_extract(zip_file, zip_password, use_default_zip_pass, default_zip_password):
+                if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
+                    final_zip_password = default_zip_password
+                else:
+                    final_zip_password = zip_password
+                if not zip_file:
+                    return [], "No ZIP file provided."
+                zip_path = None
+                if isinstance(zip_file, (str, os.PathLike)):
+                    zip_path = str(zip_file)
+                elif hasattr(zip_file, "name"):
+                    zip_path = zip_file.name
+                else:
+                    return [], "Unable to determine uploaded zip path."
+                extracted, logs = extract_zip_list(zip_path, final_zip_password)
+                # For nicer UI, present basenames in the extract logs.
+                short_logs = logs + "\n\nFiles:\n" + "\n".join([os.path.basename(p) for p in extracted])
+                return extracted, short_logs
+            batch_extract_btn.click(fn=_batch_extract, inputs=[batch_zip, batch_zip_password, batch_use_default_zip_pass, batch_default_zip_password], outputs=[batch_extracted_check, batch_extract_logs])
+            batch_transcribe_btn.click(
+                fn=run_transcription_ui if 'run_transcription_ui' in globals() else None,
                 inputs=[
+                    batch_extracted_check,
+                    batch_files,
+                    batch_model,
+                    batch_merge,
+                    batch_extracted_check,
+                    batch_zip,
+                    batch_zip_password,
+                    batch_use_default_zip_pass,
+                    batch_default_zip_password,
+                    batch_enable_memory,
+                    batch_device,
                 ],
+                outputs=[batch_logs, batch_transcripts_out, batch_download_file, batch_progress],
             )
+        # ---------------- Memory ----------------
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Memory management")
                     mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="filepath")
                     mem_import_btn = gr.Button("Import Memory File")
                     mem_manual_entry = gr.Textbox(label="Add word/phrase to memory (manual)", placeholder="Type a word or phrase")
                     mem_view_btn = gr.Button("View Memory")
                     mem_status = gr.Textbox(label="Memory status", interactive=False, lines=12)
             def _import_mem(uploaded):
                 return import_memory_file(uploaded)
             mem_clear_btn.click(fn=lambda: clear_memory(), inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
+        # ---------------- Fine-tune ----------------
+        with gr.TabItem("Fine-tune"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Prepare dataset & start fine-tuning")
+                    ft_upload = gr.File(label="Upload training ZIP or folder (zip)", file_count="single", type="filepath")
+                    ft_prepare_btn = gr.Button("Prepare dataset")
+                    ft_prepare_status = gr.Textbox(label="Prepare status / manifest", interactive=False, lines=4)
+                    gr.Markdown("### Training parameters")
+                    ft_base_model = gr.Dropdown(choices=["small", "base", "medium", "large"], value="small", label="Base model")
+                    ft_epochs = gr.Slider(minimum=1, maximum=100, value=3, step=1, label="Epochs")
+                    ft_batch = gr.Number(label="Batch size", value=8)
+                    ft_lr = gr.Number(label="Learning rate", value=1e-5, precision=8)
+                    ft_output_dir = gr.Textbox(label="Output dir (optional)", value="", placeholder="Leave blank to use temp output")
+                    ft_start_btn = gr.Button("Start Fine-tune")
+                    ft_stop_btn = gr.Button("Stop Fine-tune")
+                    ft_start_status = gr.Textbox(label="Start/Stop status", interactive=False, lines=4)
+                    ft_tail_btn = gr.Button("Tail training logs")
+                    ft_logs = gr.Textbox(label="Training logs (tail)", interactive=False, lines=12)
+                with gr.Column(scale=1):
+                    gr.Markdown("### Fine-tune notes")
+                    gr.Markdown(
+                        "- The app calls `python fine_tune.py --manifest <manifest> ...` by default; provide your training script or change START_CMD."
+                    )
+            def _prepare_action(ft_upload):
+                status, manifest = prepare_finetune_dataset(ft_upload)
+                return status
+            ft_prepare_btn.click(fn=_prepare_action, inputs=[ft_upload], outputs=[ft_prepare_status])
+            def _start_action(ft_prepare_status_txt, ft_base_model, ft_epochs, ft_batch, ft_lr, ft_output_dir):
+                manifest_guess = os.path.join(FINETUNE_WORKDIR, "manifest.tsv")
+                if not os.path.exists(manifest_guess):
+                    return "Manifest not found. Prepare dataset first or manually provide manifest."
+                status = start_finetune(manifest_guess, ft_base_model, int(ft_epochs), int(ft_batch), float(ft_lr), ft_output_dir)
+                return status
+            ft_start_btn.click(fn=_start_action, inputs=[ft_prepare_status, ft_base_model, ft_epochs, ft_batch, ft_lr, ft_output_dir], outputs=[ft_start_status])
+            ft_stop_btn.click(fn=lambda: stop_finetune(), inputs=[], outputs=[ft_start_status])
+            ft_tail_btn.click(fn=lambda: tail_finetune_logs(), inputs=[], outputs=[ft_logs])
+        # ---------------- Settings ----------------
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("### Runtime & Tips")
+                    gr.Markdown("- Device: choose CPU or CUDA in workflows. If CUDA isn't available, leave `auto` or `cpu`.")
+                    gr.Markdown("- Keep default ZIP password empty for safety.")
+                    gr.Markdown("- Extraction writes to system temp dir (extracted_audio). Re-extracting overwrites it.")
                 with gr.Column():
                     gr.Markdown("### Diagnostics")
                     diag_btn = gr.Button("Show memory summary")
                     diag_out = gr.Textbox(label="Diagnostics output", interactive=False, lines=12)
                     diag_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[diag_out])
+    # End tabs
 # ---------- Launch ----------
 if __name__ == "__main__":