Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 14, 2025

Commit

339fbba

verified ·

1 Parent(s): e166883

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -199

app.py CHANGED Viewed

@@ -1,151 +1,200 @@
 # Whisper Transcription Tool with .dct support and progress updates
-# Drop-in replacement for your app.py. Paste into your Hugging Face Space.
 from docx import Document
 import os
 import whisper
 import gradio as gr
 import pyzipper
-import glob
 import shutil
 import tempfile
 from pydub import AudioSegment
-# Load default model cache
 model_cache = {}
-def save_as_word(text, filename="merged_transcripts.docx"):
-    """Saves the given text as a Word document."""
     document = Document()
     document.add_paragraph(text)
     document.save(filename)
     return filename
-def convert_to_wav_if_needed(input_path):
     """
-    If the input file is not WAV, try to convert it to WAV using pydub/ffmpeg.
-    First attempts pydub (ffmpeg via AudioSegment). If that fails (common for obscure
-    .dct variants), it falls back to trying several ffmpeg raw-format heuristics
-    (different sample rates, signed/unsigned 8/16-bit) until one succeeds.
-    Returns path to WAV file (may be same as input if already WAV).
-    Raises an exception with ffmpeg stderr if no conversion worked.
     """
-    import subprocess
     lower = input_path.lower()
     if lower.endswith('.wav'):
         return input_path
-    # 1) Try pydub automatic detection first (simplest)
-    tmp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-    tmp_wav.close()
     try:
-        AudioSegment.from_file(input_path).export(tmp_wav.name, format='wav')
-        return tmp_wav.name
     except Exception as e_auto:
-        # Clean up the partial file
         try:
-            os.unlink(tmp_wav.name)
         except Exception:
             pass
-        # We'll try a set of ffmpeg heuristics below
-        ffmpeg_errors = []
-    # 2) Fallback: try various raw-format guesses with ffmpeg
-    guesses = [
-        # fmt, sample_rate, channels
-        ('s16le', 16000, 1),
-        ('s16le', 8000, 1),
-        ('u8', 8000, 1),
-        ('s16le', 44100, 1),
-        ('s16le', 16000, 2),
-        ('adpcm_ima_wav', 8000, 1),
-    ]
-    for fmt, sr, ch in guesses:
-        tmp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-        tmp_wav.close()
-        cmd = [
-            'ffmpeg', '-y', '-f', fmt, '-ar', str(sr), '-ac', str(ch), '-i', input_path,
-            tmp_wav.name
-        ]
-        try:
-            proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
-        except Exception as e_run:
-            ffmpeg_errors.append(f"ffmpeg run failed for fmt={fmt},sr={sr},ch={ch}: {e_run}")
-            try:
-                os.unlink(tmp_wav.name)
-            except Exception:
-                pass
-            continue
-        if proc.returncode == 0 and os.path.exists(tmp_wav.name) and os.path.getsize(tmp_wav.name) > 100:
-            # success
-            return tmp_wav.name
-        else:
-            err = proc.stderr or proc.stdout or 'no ffmpeg output'
-            ffmpeg_errors.append(f"fmt={fmt},sr={sr},ch={ch} -> rc={proc.returncode} -> {err}")
-            try:
-                os.unlink(tmp_wav.name)
-            except Exception:
-                pass
-    # If we get here, nothing worked. Provide the accumulated errors.
-    msg = "Could not convert file to WAV. Tried pydub and several ffmpeg heuristics. Errors:
-" + "
-".join(ffmpeg_errors)
-    raise Exception(msg)
-def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
     """
-    Generator function for Gradio that yields progress updates.
-    Outputs: (log_text, transcripts_text, word_file_path_or_None, percent_int)
     """
-    # initial state
     log_outputs = []
     transcript_outputs_list = []
     word_file_path = None
     extracted_audio_paths = []
-    temp_extract_dir = "/tmp/extracted_audio"
-    # yield initial empty state (so UI shows up immediately)
     yield "", "", None, 0
-    # cleanup any previous temp dir
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
             log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
-        except OSError as e:
             log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
-    # If a zip is provided, extract supported audio files
     if zip_file:
         log_outputs.append(f"Processing zip file: {zip_file}")
         yield "\n\n".join(log_outputs), "", None, 2
         try:
             with pyzipper.ZipFile(zip_file, 'r') as zf:
                 if zip_password:
                     try:
                         zf.setpassword(zip_password.encode())
-                    except RuntimeError:
                         log_outputs.append("Error: Incorrect password for the zip file.")
                         yield "\n\n".join(log_outputs), "", None, 100
                         return
-                os.makedirs(temp_extract_dir, exist_ok=True)
                 audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
                 extracted_count = 0
                 for file_info in zf.infolist():
                     if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
                         try:
-                            # extract returns path relative to extract dir; build absolute path
                             zf.extract(file_info, path=temp_extract_dir)
                             extracted_path = os.path.join(temp_extract_dir, file_info.filename)
-                            # Ensure parent dirs exist (zip could contain folders)
                             extracted_path = os.path.normpath(extracted_path)
                             if os.path.exists(extracted_path):
                                 extracted_audio_paths.append(extracted_path)
@@ -156,7 +205,6 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
                 if extracted_count == 0:
                     log_outputs.append("No supported audio files found in the zip archive.")
-                    # cleanup empty dir
                     try:
                         shutil.rmtree(temp_extract_dir)
                         log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
@@ -171,7 +219,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
-                except OSError as e:
                     log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
             yield "\n\n".join(log_outputs), "", None, 100
             return
@@ -181,31 +229,29 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
-                except OSError as e:
-                    log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
             yield "\n\n".join(log_outputs), "", None, 100
             return
-    # Build list of input file paths (strings)
     all_audio_paths = []
-    if file_paths:
-        # file_paths from Gradio with type="filepath" come as list of paths
-        if isinstance(file_paths, (list, tuple)):
-            all_audio_paths.extend(file_paths)
         else:
-            all_audio_paths.append(file_paths)
     if extracted_audio_paths:
         all_audio_paths.extend(extracted_audio_paths)
     if not all_audio_paths:
         log_outputs.append("No audio files provided for transcription.")
-        # cleanup
         if os.path.exists(temp_extract_dir):
             try:
                 shutil.rmtree(temp_extract_dir)
                 log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
-            except OSError as e:
                 log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
         yield "\n\n".join(log_outputs), "", None, 100
         return
@@ -213,7 +259,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
     total_files = len(all_audio_paths)
     processed = 0
-    # Load model once (cache)
     if model_name not in model_cache:
         log_outputs.append(f"Loading model: {model_name}")
         yield "\n\n".join(log_outputs), "", None, 3
@@ -221,26 +267,24 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
             model_cache[model_name] = whisper.load_model(model_name)
         except Exception as e:
             log_outputs.append(f"Error loading model {model_name}: {e}")
-            # cleanup
             if os.path.exists(temp_extract_dir):
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
-                except OSError as e:
-                    log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
             yield "\n\n".join(log_outputs), "", None, 100
             return
     model = model_cache[model_name]
-    # Process files one by one and yield progress
     for idx, path in enumerate(all_audio_paths):
         basename = os.path.basename(path)
         try:
             log_outputs.append(f"Starting processing: {basename}")
             yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
-            # If file is .dct or other non-wav, convert
             try:
                 wav_path = convert_to_wav_if_needed(path)
                 if wav_path != path:
@@ -255,7 +299,7 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
                 yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
                 continue
-            # Transcribe using Whisper model
             try:
                 log_outputs.append(f"Transcribing: {basename}")
                 yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(10 + 80 * (processed / total_files))
@@ -263,102 +307,5 @@ def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_fi
                 result = model.transcribe(wav_path)
                 transcript = result.get("text", "")
-                # Save transcript to /tmp
                 base = os.path.splitext(basename)[0]
-                save_path = os.path.join('/tmp', f"{base}-transcript.txt")
-                with open(save_path, 'w', encoding='utf-8') as f:
-                    f.write(transcript)
-                log_outputs.append(f"File processed: {basename} -> {save_path}")
-                transcript_outputs_list.append(f"Transcript for {basename}:\n{transcript}")
-            except Exception as e:
-                log_outputs.append(f"Error processing {basename}: {e}")
-                transcript_outputs_list.append(f"Could not transcribe {basename} due to an error: {e}")
-            finally:
-                # remove temporary wav if we created one
-                if wav_path != path and os.path.exists(wav_path):
-                    try:
-                        os.unlink(wav_path)
-                    except Exception:
-                        pass
-            processed += 1
-            percent = int(5 + 90 * (processed / total_files))
-            yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
-        except Exception as e:
-            log_outputs.append(f"Unexpected error with {basename}: {e}")
-            transcript_outputs_list.append(f"Unexpected error with {basename}: {e}")
-            processed += 1
-            percent = int(5 + 90 * (processed / total_files))
-            yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
-    # After all files processed, possibly save merged Word file
-    combined_transcript_string = "\n\n---\n\n".join(transcript_outputs_list)
-    if merge_checkbox and combined_transcript_string.strip():
-        try:
-            word_filename = save_as_word(combined_transcript_string)
-            log_outputs.append(f"Merged transcript saved to: {word_filename}")
-            word_file_path = word_filename
-        except Exception as e:
-            log_outputs.append(f"Error saving merged transcript to Word file: {e}")
-    # cleanup extracted files
-    if os.path.exists(temp_extract_dir):
-        try:
-            shutil.rmtree(temp_extract_dir)
-            log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
-        except OSError as e:
-            log_outputs.append(f"Warning: Could not clean up temporary temporary directory {temp_extract_dir}: {e}")
-    # final yield at 100%
-    yield "\n\n".join(log_outputs), combined_transcript_string, word_file_path, 100
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("## Whisper Transcription Tool (Multiple Files) — .dct support + progress")
-    with gr.Row():
-        model_dropdown = gr.Dropdown(
-            choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2", "large-v3"],
-            value="base",
-            label="Select Whisper Model"
-        )
-        advanced_checkbox = gr.Checkbox(label="Enable Advanced Options")
-        merge_checkbox = gr.Checkbox(label="Merge Transcripts into Single File", value=False)
-    with gr.Row():
-        zip_input = gr.File(file_count="single", type="filepath", label="Upload Zip File (Optional)")
-        zip_password_input = gr.Textbox(label="Zip File Password (Optional)", type="password")
-    audio_input = gr.File(file_count="multiple", type="filepath", label="Upload Audio Files (Optional)")
-    transcribe_btn = gr.Button("Start Transcription")
-    log_output = gr.Textbox(label="Log Output", lines=10)
-    transcript_output = gr.Textbox(label="Transcripts", lines=20)
-    word_file_output = gr.File(label="Download Merged Transcript (.docx)")
-    progress_num = gr.Number(value=0, label="Progress (%)")
-    def update_file_visibility(merge_checked):
-        return gr.update(visible=merge_checked)
-    merge_checkbox.change(
-        update_file_visibility,
-        inputs=[merge_checkbox],
-        outputs=[word_file_output],
-        api_name="update_file_visibility"
-    )
-    transcribe_btn.click(
-        transcribe_multiple,
-        inputs=[audio_input, model_dropdown, advanced_checkbox, merge_checkbox, zip_input, zip_password_input],
-        outputs=[log_output, transcript_output, word_file_output, progress_num]
-    )
-demo.launch()

+# app.py
 # Whisper Transcription Tool with .dct support and progress updates
+# Drop-in for Hugging Face Spaces (requires ffmpeg in environment)
 from docx import Document
 import os
 import whisper
 import gradio as gr
 import pyzipper
 import shutil
 import tempfile
+import subprocess
 from pydub import AudioSegment
+# Model cache to avoid reloading
 model_cache = {}
+def save_as_word(text, filename=None):
+    """Save text to a .docx and return the path."""
+    if filename is None:
+        filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
     document = Document()
     document.add_paragraph(text)
     document.save(filename)
     return filename
+def convert_to_wav_if_needed(input_path, diagnostics_keep=False):
     """
+    Robust conversion: try pydub auto first. If that fails,
+    attempt a grid of ffmpeg raw-format guesses. On success returns WAV path.
+    On total failure writes diagnostics into a temp dir and raises Exception
+    containing the diagnostics path.
     """
     lower = input_path.lower()
     if lower.endswith('.wav'):
         return input_path
+    # 1) Try pydub/AudioSegment auto
+    auto_err = ""
     try:
+        tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+        tmp.close()
+        AudioSegment.from_file(input_path).export(tmp.name, format='wav')
+        return tmp.name
     except Exception as e_auto:
+        auto_err = str(e_auto)
         try:
+            os.unlink(tmp.name)
         except Exception:
             pass
+    # 2) Exhaustive ffmpeg guess grid
+    pcm_formats = ['s16le', 's32le', 's24le', 's8', 'u8', 's16be', 'pcm_s16le', 'pcm_u8', 'pcm_u16le']
+    mulaw_alaw = ['mulaw', 'alaw']
+    adpcm = ['adpcm_ima_wav', 'adpcm_ms']
+    other = ['gsm', 'g726', 'vorbis']  # extras; may fail but harmless
+    formats = pcm_formats + mulaw_alaw + adpcm + other
+    sample_rates = [8000, 11025, 12000, 16000, 22050, 32000, 44100, 48000]
+    channels = [1, 2]
+    diagnostics = []
+    diag_tmpdir = tempfile.mkdtemp(prefix='dct_diag_')
+    diag_log_path = os.path.join(diag_tmpdir, 'conversion_diagnostics.txt')
+    for fmt in formats:
+        for sr in sample_rates:
+            for ch in channels:
+                out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+                out_wav.close()
+                cmd = [
+                    'ffmpeg', '-hide_banner', '-loglevel', 'error', '-y',
+                    '-f', fmt, '-ar', str(sr), '-ac', str(ch),
+                    '-i', input_path, out_wav.name
+                ]
+                try:
+                    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
+                except Exception as e_run:
+                    diagnostics.append(f"RUN-FAIL fmt={fmt} sr={sr} ch={ch} err={e_run}")
+                    try:
+                        os.unlink(out_wav.name)
+                    except Exception:
+                        pass
+                    continue
+                rc = proc.returncode
+                stderr = proc.stderr.strip() if proc.stderr else ""
+                stdout = proc.stdout.strip() if proc.stdout else ""
+                diagnostics.append(f"ATTEMPT fmt={fmt} sr={sr} ch={ch} rc={rc}")
+                if stdout:
+                    diagnostics.append("STDOUT:")
+                    diagnostics.append(stdout)
+                if stderr:
+                    diagnostics.append("STDERR:")
+                    diagnostics.append(stderr)
+                diagnostics.append("-" * 60)
+                # success heuristic: exit 0 + output file present and > 200 bytes
+                try:
+                    if rc == 0 and os.path.exists(out_wav.name) and os.path.getsize(out_wav.name) > 200:
+                        # write compact diagnostics including success info
+                        with open(diag_log_path, 'w', encoding='utf-8') as df:
+                            df.write("pydub auto-error:\n")
+                            df.write(auto_err + "\n\n")
+                            df.write("Successful ffmpeg guess:\n")
+                            df.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
+                            df.write("Recent diagnostics (truncated):\n")
+                            df.write("\n".join(diagnostics[-1000:]))
+                        return out_wav.name
+                except Exception:
+                    pass
+                try:
+                    os.unlink(out_wav.name)
+                except Exception:
+                    pass
+    # try ffprobe if available for more info
+    try:
+        fp = subprocess.run(['ffprobe', '-v', 'error', '-show_format', '-show_streams', input_path],
+                            capture_output=True, text=True, timeout=15)
+        diagnostics.append("FFPROBE OUTPUT:")
+        diagnostics.append(fp.stdout.strip() or fp.stderr.strip())
+    except Exception as e:
+        diagnostics.append(f"ffprobe not available or failed: {e}")
+    # hex preview of first bytes
+    try:
+        with open(input_path, 'rb') as f:
+            head = f.read(256)
+            diagnostics.append("HEX PREVIEW (first 256 bytes):")
+            diagnostics.append(head.hex())
+    except Exception as e:
+        diagnostics.append(f"Could not read file head: {e}")
+    # write diagnostics
+    try:
+        with open(diag_log_path, 'w', encoding='utf-8') as df:
+            df.write("pydub auto-error:\n")
+            df.write(auto_err + "\n\n")
+            df.write("Full diagnostics from ffmpeg attempts:\n\n")
+            df.write("\n".join(diagnostics))
+    except Exception as e:
+        raise Exception(f"Conversion failed and diagnostics could not be written: {e}")
+    raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log_path}\nFirst diagnostics lines:\n" + "\n".join(diagnostics[:12]))
+def transcribe_multiple(audio_files, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
     """
+    Generator for Gradio to yield live progress.
+    Inputs:
+      audio_files: list or single filepath(s) (type='filepath' in Gradio)
+      model_name: whisper model name string
+      merge_checkbox: boolean to merge into docx
+      zip_file: optional path to zip file (type='filepath')
+      zip_password: optional password
+    Yields: (log_text, transcripts_text, word_file_path_or_None, percent_int)
     """
     log_outputs = []
     transcript_outputs_list = []
     word_file_path = None
     extracted_audio_paths = []
+    temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
+    # initial yield so UI shows immediately
     yield "", "", None, 0
+    # cleanup old extract dir
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
             log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
+        except Exception as e:
             log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
+    # Handle zip file (zip_file may be a path string)
     if zip_file:
         log_outputs.append(f"Processing zip file: {zip_file}")
         yield "\n\n".join(log_outputs), "", None, 2
         try:
+            os.makedirs(temp_extract_dir, exist_ok=True)
             with pyzipper.ZipFile(zip_file, 'r') as zf:
                 if zip_password:
                     try:
                         zf.setpassword(zip_password.encode())
+                    except Exception:
                         log_outputs.append("Error: Incorrect password for the zip file.")
                         yield "\n\n".join(log_outputs), "", None, 100
                         return
                 audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
                 extracted_count = 0
                 for file_info in zf.infolist():
                     if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
                         try:
                             zf.extract(file_info, path=temp_extract_dir)
                             extracted_path = os.path.join(temp_extract_dir, file_info.filename)
                             extracted_path = os.path.normpath(extracted_path)
                             if os.path.exists(extracted_path):
                                 extracted_audio_paths.append(extracted_path)
                 if extracted_count == 0:
                     log_outputs.append("No supported audio files found in the zip archive.")
                     try:
                         shutil.rmtree(temp_extract_dir)
                         log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
+                except Exception as e:
                     log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
             yield "\n\n".join(log_outputs), "", None, 100
             return
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
+                except Exception as e2:
+                    log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e2}")
             yield "\n\n".join(log_outputs), "", None, 100
             return
+    # Build list of audio file paths
     all_audio_paths = []
+    if audio_files:
+        if isinstance(audio_files, (list, tuple)):
+            all_audio_paths.extend(audio_files)
         else:
+            all_audio_paths.append(audio_files)
     if extracted_audio_paths:
         all_audio_paths.extend(extracted_audio_paths)
     if not all_audio_paths:
         log_outputs.append("No audio files provided for transcription.")
         if os.path.exists(temp_extract_dir):
             try:
                 shutil.rmtree(temp_extract_dir)
                 log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
+            except Exception as e:
                 log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
         yield "\n\n".join(log_outputs), "", None, 100
         return
     total_files = len(all_audio_paths)
     processed = 0
+    # Load whisper model once
     if model_name not in model_cache:
         log_outputs.append(f"Loading model: {model_name}")
         yield "\n\n".join(log_outputs), "", None, 3
             model_cache[model_name] = whisper.load_model(model_name)
         except Exception as e:
             log_outputs.append(f"Error loading model {model_name}: {e}")
             if os.path.exists(temp_extract_dir):
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
+                except Exception:
+                    pass
             yield "\n\n".join(log_outputs), "", None, 100
             return
     model = model_cache[model_name]
     for idx, path in enumerate(all_audio_paths):
         basename = os.path.basename(path)
         try:
             log_outputs.append(f"Starting processing: {basename}")
             yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
+            # Convert to WAV if needed
             try:
                 wav_path = convert_to_wav_if_needed(path)
                 if wav_path != path:
                 yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
                 continue
+            # Transcribe with Whisper
             try:
                 log_outputs.append(f"Transcribing: {basename}")
                 yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(10 + 80 * (processed / total_files))
                 result = model.transcribe(wav_path)
                 transcript = result.get("text", "")
                 base = os.path.splitext(basename)[0]
+                save_path = os.path.join(tempfile.gett_