Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 14, 2025

Commit

cbbc496

verified ·

1 Parent(s): a2d60a6

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -115

app.py CHANGED Viewed

@@ -1,15 +1,20 @@
-# @title Default title text
 from docx import Document
 import os
 import whisper
 import gradio as gr
 import pyzipper
 import glob
-import shutil # Import shutil for removing directories
-# Load default model
 model_cache = {}
 def save_as_word(text, filename="merged_transcripts.docx"):
     """Saves the given text as a Word document."""
     document = Document()
@@ -18,155 +23,231 @@ def save_as_word(text, filename="merged_transcripts.docx"):
     return filename
-def transcribe_multiple(audio_list, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
     log_outputs = []
     transcript_outputs_list = []
     word_file_path = None
     extracted_audio_paths = []
     temp_extract_dir = "/tmp/extracted_audio"
-    # Clean up previous extractions
     if os.path.exists(temp_extract_dir):
         try:
-            shutil.rmtree(temp_extract_dir) # Use shutil.rmtree for removing the directory and its contents
             log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
         except OSError as e:
             log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
     if zip_file:
-        log_outputs.append(f"Processing zip file: {zip_file.name}")
         try:
-            with pyzipper.ZipFile(zip_file.name, 'r') as zf:
                 if zip_password:
                     try:
                         zf.setpassword(zip_password.encode())
                     except RuntimeError:
-                         log_outputs.append("Error: Incorrect password for the zip file.")
-                         # Return immediately on password error
-                         return "\n\n".join(log_outputs), "", None
-                # Create the extraction directory if it doesn't exist
                 os.makedirs(temp_extract_dir, exist_ok=True)
-                audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct'] # Added .dct extension
                 extracted_count = 0
                 for file_info in zf.infolist():
                     if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
-                         try:
-                             extracted_path = zf.extract(file_info, path=temp_extract_dir)
-                             extracted_audio_paths.append(extracted_path)
-                             log_outputs.append(f"Extracted: {file_info.filename}")
-                             extracted_count += 1
-                         except Exception as e:
-                             log_outputs.append(f"Error extracting {file_info.filename}: {e}")
                 if extracted_count == 0:
-                     log_outputs.append("No supported audio files found in the zip archive.")
-                     # If zip was provided but no audio found, return here
-                     if not audio_list:
-                          # Clean up the newly created empty directory before returning
-                          if os.path.exists(temp_extract_dir):
-                              try:
-                                  os.rmdir(temp_extract_dir)
-                                  log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
-                              except OSError as e:
-                                  log_outputs.append(f"Warning: Could not remove empty temporary directory {temp_extract_dir}: {e}")
-                          return "\n\n".join(log_outputs), "", None
         except pyzipper.BadZipFile:
-            log_outputs.append(f"Error: Invalid zip file format.")
-            # Clean up any partial extractions before returning
             if os.path.exists(temp_extract_dir):
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
                 except OSError as e:
                     log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
-            return "\n\n".join(log_outputs), "", None
-        except FileNotFoundError:
-             log_outputs.append(f"Error: Zip file not found.")
-             return "\n\n".join(log_outputs), "", None
         except Exception as e:
-             log_outputs.append(f"An unexpected error occurred during zip processing: {e}")
-             # Clean up any partial extractions before returning
-             if os.path.exists(temp_extract_dir):
-                 try:
-                     shutil.rmtree(temp_extract_dir)
-                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
-                 except OSError as e:
-                     log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
-             return "\n\n".join(log_outputs), "", None
-    all_audio_files = []
-    if audio_list:
-        all_audio_files.extend(audio_list)
     if extracted_audio_paths:
-        for path in extracted_audio_paths:
-            if os.path.exists(path):
-                 all_audio_files.append(gr.File(path))
-    if not all_audio_files:
-         log_outputs.append("No audio files provided for transcription.")
-         # Clean up the temporary directory if it was created but no audio was found
-         if os.path.exists(temp_extract_dir):
-             try:
-                 shutil.rmtree(temp_extract_dir)
-                 log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
-             except OSError as e:
-                 log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
-         return "\n\n".join(log_outputs), "", None
-    for audio in all_audio_files:
-        # Load model (cache for reuse)
-        if model_name not in model_cache:
-            log_outputs.append(f"Loading model: {model_name}")
             try:
-                 model_cache[model_name] = whisper.load_model(model_name)
             except Exception as e:
-                 log_outputs.append(f"Error loading model {model_name}: {e}")
-                 # If model loading fails for the first file, stop processing
-                 # Clean up extracted files before returning
-                 if os.path.exists(temp_extract_dir):
-                     try:
-                         shutil.rmtree(temp_extract_dir)
-                         log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
-                     except OSError as e:
-                         log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
-                 return "\n\n".join(log_outputs), "", None
-        model = model_cache[model_name]
-        # Transcribe
-        try:
-            log_outputs.append(f"Transcribing: {os.path.basename(audio.name)}")
-            result = model.transcribe(audio.name)
-            transcript = result["text"]
-            # Save transcript
-            base = os.path.splitext(os.path.basename(audio.name))[0]
-            # Save transcripts in the /tmp directory
-            save_path = os.path.join("/tmp", f"{base}-transcript.txt")
-            with open(save_path, "w", encoding="utf-8") as f:
-                f.write(transcript)
-            log = f"File: {os.path.basename(audio.name)}\nSaved to: {save_path}"
-            log_outputs.append(log)
-            transcript_outputs_list.append(f"Transcript for {os.path.basename(audio.name)}:\n{transcript}")
         except Exception as e:
-            log_outputs.append(f"Error processing {os.path.basename(audio.name)}: {e}")
-            transcript_outputs_list.append(f"Could not transcribe {os.path.basename(audio.name)} due to an error.")
     combined_transcript_string = "\n\n---\n\n".join(transcript_outputs_list)
-    if merge_checkbox and combined_transcript_string.strip(): # Only create word file if merging and there is content
         try:
             word_filename = save_as_word(combined_transcript_string)
             log_outputs.append(f"Merged transcript saved to: {word_filename}")
@@ -174,22 +255,21 @@ def transcribe_multiple(audio_list, model_name, advanced, merge_checkbox, zip_fi
         except Exception as e:
             log_outputs.append(f"Error saving merged transcript to Word file: {e}")
-    # Clean up extracted files after processing
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
             log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
         except OSError as e:
-             log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_extract_dir}: {e}")
-    return "\n\n".join(log_outputs), combined_transcript_string, word_file_path
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## Whisper Transcription Tool (Multiple Files)")
     with gr.Row():
         model_dropdown = gr.Dropdown(
@@ -210,10 +290,11 @@ with gr.Blocks() as demo:
     log_output = gr.Textbox(label="Log Output", lines=10)
     transcript_output = gr.Textbox(label="Transcripts", lines=20)
-    word_file_output = gr.File(label="Download Merged Transcript (.docx)", visible=False)
     def update_file_visibility(merge_checked):
-        return gr.File(visible=merge_checked)
     merge_checkbox.change(
         update_file_visibility,
@@ -222,11 +303,11 @@ with gr.Blocks() as demo:
         api_name="update_file_visibility"
     )
     transcribe_btn.click(
         transcribe_multiple,
         inputs=[audio_input, model_dropdown, advanced_checkbox, merge_checkbox, zip_input, zip_password_input],
-        outputs=[log_output, transcript_output, word_file_output]
     )
-demo.launch()

+# Whisper Transcription Tool with .dct support and progress updates
+# Drop-in replacement for your app.py. Paste into your Hugging Face Space.
 from docx import Document
 import os
 import whisper
 import gradio as gr
 import pyzipper
 import glob
+import shutil
+import tempfile
+from pydub import AudioSegment
+# Load default model cache
 model_cache = {}
 def save_as_word(text, filename="merged_transcripts.docx"):
     """Saves the given text as a Word document."""
     document = Document()
     return filename
+def convert_to_wav_if_needed(input_path):
+    """
+    If the input file is not WAV, try to convert it to WAV using pydub/ffmpeg.
+    Returns path to WAV file (may be same as input if already WAV).
+    """
+    lower = input_path.lower()
+    if lower.endswith('.wav'):
+        return input_path
+    # create a temp wav file
+    tmp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    tmp_wav.close()
+    try:
+        # pydub will use ffmpeg under the hood
+        AudioSegment.from_file(input_path).export(tmp_wav.name, format='wav')
+        return tmp_wav.name
+    except Exception as e:
+        # cleanup if conversion failed
+        try:
+            os.unlink(tmp_wav.name)
+        except Exception:
+            pass
+        raise e
+def transcribe_multiple(file_paths, model_name, advanced, merge_checkbox, zip_file=None, zip_password=None):
+    """
+    Generator function for Gradio that yields progress updates.
+    Outputs: (log_text, transcripts_text, word_file_path_or_None, percent_int)
+    """
+    # initial state
     log_outputs = []
     transcript_outputs_list = []
     word_file_path = None
     extracted_audio_paths = []
     temp_extract_dir = "/tmp/extracted_audio"
+    # yield initial empty state (so UI shows up immediately)
+    yield "", "", None, 0
+    # cleanup any previous temp dir
     if os.path.exists(temp_extract_dir):
         try:
+            shutil.rmtree(temp_extract_dir)
             log_outputs.append(f"Cleaned up previous temporary directory: {temp_extract_dir}")
         except OSError as e:
             log_outputs.append(f"Warning: Could not clean up previous temporary directory {temp_extract_dir}: {e}")
+    # If a zip is provided, extract supported audio files
     if zip_file:
+        log_outputs.append(f"Processing zip file: {zip_file}")
+        yield "\n\n".join(log_outputs), "", None, 2
         try:
+            with pyzipper.ZipFile(zip_file, 'r') as zf:
                 if zip_password:
                     try:
                         zf.setpassword(zip_password.encode())
                     except RuntimeError:
+                        log_outputs.append("Error: Incorrect password for the zip file.")
+                        yield "\n\n".join(log_outputs), "", None, 100
+                        return
                 os.makedirs(temp_extract_dir, exist_ok=True)
+                audio_extensions = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
                 extracted_count = 0
                 for file_info in zf.infolist():
                     if not file_info.is_dir() and os.path.splitext(file_info.filename)[1].lower() in audio_extensions:
+                        try:
+                            # extract returns path relative to extract dir; build absolute path
+                            zf.extract(file_info, path=temp_extract_dir)
+                            extracted_path = os.path.join(temp_extract_dir, file_info.filename)
+                            # Ensure parent dirs exist (zip could contain folders)
+                            extracted_path = os.path.normpath(extracted_path)
+                            if os.path.exists(extracted_path):
+                                extracted_audio_paths.append(extracted_path)
+                                log_outputs.append(f"Extracted: {file_info.filename}")
+                                extracted_count += 1
+                        except Exception as e:
+                            log_outputs.append(f"Error extracting {file_info.filename}: {e}")
                 if extracted_count == 0:
+                    log_outputs.append("No supported audio files found in the zip archive.")
+                    # cleanup empty dir
+                    try:
+                        shutil.rmtree(temp_extract_dir)
+                        log_outputs.append(f"Removed empty temporary directory: {temp_extract_dir}")
+                    except Exception as e:
+                        log_outputs.append(f"Warning: Could not remove temporary directory {temp_extract_dir}: {e}")
+                    yield "\n\n".join(log_outputs), "", None, 100
+                    return
         except pyzipper.BadZipFile:
+            log_outputs.append("Error: Invalid zip file format.")
             if os.path.exists(temp_extract_dir):
                 try:
                     shutil.rmtree(temp_extract_dir)
                     log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
                 except OSError as e:
                     log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
+            yield "\n\n".join(log_outputs), "", None, 100
+            return
         except Exception as e:
+            log_outputs.append(f"An unexpected error occurred during zip processing: {e}")
+            if os.path.exists(temp_extract_dir):
+                try:
+                    shutil.rmtree(temp_extract_dir)
+                    log_outputs.append(f"Cleaned up partial temporary directory: {temp_extract_dir}")
+                except OSError as e:
+                    log_outputs.append(f"Warning: Could not clean up partial temporary directory {temp_extract_dir}: {e}")
+            yield "\n\n".join(log_outputs), "", None, 100
+            return
+    # Build list of input file paths (strings)
+    all_audio_paths = []
+    if file_paths:
+        # file_paths from Gradio with type="filepath" come as list of paths
+        if isinstance(file_paths, (list, tuple)):
+            all_audio_paths.extend(file_paths)
+        else:
+            all_audio_paths.append(file_paths)
     if extracted_audio_paths:
+        all_audio_paths.extend(extracted_audio_paths)
+    if not all_audio_paths:
+        log_outputs.append("No audio files provided for transcription.")
+        # cleanup
+        if os.path.exists(temp_extract_dir):
+            try:
+                shutil.rmtree(temp_extract_dir)
+                log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
+            except OSError as e:
+                log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
+        yield "\n\n".join(log_outputs), "", None, 100
+        return
+    total_files = len(all_audio_paths)
+    processed = 0
+    # Load model once (cache)
+    if model_name not in model_cache:
+        log_outputs.append(f"Loading model: {model_name}")
+        yield "\n\n".join(log_outputs), "", None, 3
+        try:
+            model_cache[model_name] = whisper.load_model(model_name)
+        except Exception as e:
+            log_outputs.append(f"Error loading model {model_name}: {e}")
+            # cleanup
+            if os.path.exists(temp_extract_dir):
+                try:
+                    shutil.rmtree(temp_extract_dir)
+                    log_outputs.append(f"Cleaned up temporary directory after model loading error: {temp_extract_dir}")
+                except OSError as e:
+                    log_outputs.append(f"Warning: Could not clean up temporary directory {temp_extract_dir}: {e}")
+            yield "\n\n".join(log_outputs), "", None, 100
+            return
+    model = model_cache[model_name]
+    # Process files one by one and yield progress
+    for idx, path in enumerate(all_audio_paths):
+        basename = os.path.basename(path)
+        try:
+            log_outputs.append(f"Starting processing: {basename}")
+            yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
+            # If file is .dct or other non-wav, convert
             try:
+                wav_path = convert_to_wav_if_needed(path)
+                if wav_path != path:
+                    log_outputs.append(f"Converted {basename} -> WAV")
+                else:
+                    log_outputs.append(f"Using WAV file: {basename}")
+                yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
             except Exception as e:
+                log_outputs.append(f"Conversion failed for {basename}: {e}")
+                transcript_outputs_list.append(f"Could not convert {basename}: {e}")
+                processed += 1
+                yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(5 + 90 * (processed / total_files))
+                continue
+            # Transcribe using Whisper model
+            try:
+                log_outputs.append(f"Transcribing: {basename}")
+                yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, int(10 + 80 * (processed / total_files))
+                result = model.transcribe(wav_path)
+                transcript = result.get("text", "")
+                # Save transcript to /tmp
+                base = os.path.splitext(basename)[0]
+                save_path = os.path.join('/tmp', f"{base}-transcript.txt")
+                with open(save_path, 'w', encoding='utf-8') as f:
+                    f.write(transcript)
+                log_outputs.append(f"File processed: {basename} -> {save_path}")
+                transcript_outputs_list.append(f"Transcript for {basename}:\n{transcript}")
+            except Exception as e:
+                log_outputs.append(f"Error processing {basename}: {e}")
+                transcript_outputs_list.append(f"Could not transcribe {basename} due to an error: {e}")
+            finally:
+                # remove temporary wav if we created one
+                if wav_path != path and os.path.exists(wav_path):
+                    try:
+                        os.unlink(wav_path)
+                    except Exception:
+                        pass
+            processed += 1
+            percent = int(5 + 90 * (processed / total_files))
+            yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
         except Exception as e:
+            log_outputs.append(f"Unexpected error with {basename}: {e}")
+            transcript_outputs_list.append(f"Unexpected error with {basename}: {e}")
+            processed += 1
+            percent = int(5 + 90 * (processed / total_files))
+            yield "\n\n".join(log_outputs), "\n\n".join(transcript_outputs_list), None, percent
+    # After all files processed, possibly save merged Word file
     combined_transcript_string = "\n\n---\n\n".join(transcript_outputs_list)
+    if merge_checkbox and combined_transcript_string.strip():
         try:
             word_filename = save_as_word(combined_transcript_string)
             log_outputs.append(f"Merged transcript saved to: {word_filename}")
         except Exception as e:
             log_outputs.append(f"Error saving merged transcript to Word file: {e}")
+    # cleanup extracted files
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
             log_outputs.append(f"Cleaned up temporary directory: {temp_extract_dir}")
         except OSError as e:
+            log_outputs.append(f"Warning: Could not clean up temporary temporary directory {temp_extract_dir}: {e}")
+    # final yield at 100%
+    yield "\n\n".join(log_outputs), combined_transcript_string, word_file_path, 100
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("## Whisper Transcription Tool (Multiple Files) — .dct support + progress")
     with gr.Row():
         model_dropdown = gr.Dropdown(
     log_output = gr.Textbox(label="Log Output", lines=10)
     transcript_output = gr.Textbox(label="Transcripts", lines=20)
+    word_file_output = gr.File(label="Download Merged Transcript (.docx)")
+    progress_num = gr.Number(value=0, label="Progress (%)")
     def update_file_visibility(merge_checked):
+        return gr.update(visible=merge_checked)
     merge_checkbox.change(
         update_file_visibility,
         api_name="update_file_visibility"
     )
     transcribe_btn.click(
         transcribe_multiple,
         inputs=[audio_input, model_dropdown, advanced_checkbox, merge_checkbox, zip_input, zip_password_input],
+        outputs=[log_output, transcript_output, word_file_output, progress_num]
     )
+demo.launch()