Spaces:

clementBE
/

Audio_transcrib_base

Paused

App Files Files Community

clementBE commited on Sep 30, 2025

Commit

a602b66

verified ·

1 Parent(s): ceaed29

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -9

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ MODEL_SIZES = {
     "Tiny (Fastest)": "openai/whisper-tiny",
     "Base (Faster)": "openai/whisper-base",
     "Small (Balanced)": "openai/whisper-small",
     "Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
 }
@@ -36,10 +37,9 @@ LANGUAGE_MAP = {
 }
 LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
-# --- CRUCIAL FIX: Corrected Initial Prompt key ---
-# Changed from 'FRENCH_INITIAL_PROMPT' to 'FRENCH_TRANSCRIPTION_PROMPT' for clarity,
-# and will be used as the value for the "initial_prompt" key.
-FRENCH_TRANSCRIPTION_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
 def get_model_pipeline(model_name, pipeline_type, progress):
     """
@@ -158,10 +158,12 @@ def generate_summary(text, target_language_code, progress):
         summarizer = get_model_pipeline("t5-small", "summarization", progress)
         if target_language_code == "fr":
             prompt = f"résumer: {text}"
         elif target_language_code == "es":
             prompt = f"resumir: {text}"
         else:
             prompt = f"summarize: {text}"
         summary = summarizer(
@@ -178,8 +180,7 @@ def generate_summary(text, target_language_code, progress):
 @spaces.GPU
 def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
     """
-    Main function to transcribe audio and export. Uses selected_language and
-    'initial_prompt' to fix language adherence, particularly for large models.
     """
     if audio_file is None:
         return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
@@ -220,10 +221,10 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
         if lang_code:
             generate_kwargs["language"] = lang_code
-            # --- CRUCIAL FIX for large models: Use 'initial_prompt' ---
             if lang_code == "fr":
-                # Changed from 'prompt' to 'initial_prompt' to resolve the ValueError
-                generate_kwargs["initial_prompt"] = FRENCH_TRANSCRIPTION_PROMPT
             # -----------------------------------------------------------
     # 3. Transcribe the segment
@@ -249,6 +250,7 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
     # 5. Generate Summary (if requested)
     summary_text = ""
     if summarize_output and transcribed_text:
         summary_text = generate_summary(transcribed_text, lang_code, progress)
     elif summarize_output and not transcribed_text:
         summary_text = "Transcription failed or was empty, cannot generate summary."
@@ -308,6 +310,7 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
             )
             language_selector = gr.Dropdown(
                 label="Select Expected Language (Uses initial prompt for better French accuracy)",
                 choices=LANGUAGE_CHOICES,
                 value="French",

     "Tiny (Fastest)": "openai/whisper-tiny",
     "Base (Faster)": "openai/whisper-base",
     "Small (Balanced)": "openai/whisper-small",
+    # FIX: Remove redundant entry and keep only the general-purpose one
     "Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
 }
 }
 LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
+# --- CRUCIAL FIX: Initial Prompt for Transcription ---
+# Adding an initial prompt greatly increases adherence to the target language.
+FRENCH_INITIAL_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
 def get_model_pipeline(model_name, pipeline_type, progress):
     """
         summarizer = get_model_pipeline("t5-small", "summarization", progress)
         if target_language_code == "fr":
+            # French summarization prompt
             prompt = f"résumer: {text}"
         elif target_language_code == "es":
             prompt = f"resumir: {text}"
         else:
+            # Default English prompt (or for auto-detect/other languages)
             prompt = f"summarize: {text}"
         summary = summarizer(
 @spaces.GPU
 def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
     """
+    Main function to transcribe audio and export. Includes initial_prompt fix for language adherence.
     """
     if audio_file is None:
         return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
         if lang_code:
             generate_kwargs["language"] = lang_code
+            # --- CRUCIAL FIX for large models (Distil-Large-v3, etc.) ---
             if lang_code == "fr":
+                # Add the initial prompt to reinforce French transcription
+                generate_kwargs["prompt"] = FRENCH_INITIAL_PROMPT
             # -----------------------------------------------------------
     # 3. Transcribe the segment
     # 5. Generate Summary (if requested)
     summary_text = ""
     if summarize_output and transcribed_text:
+        # Pass the language code to the summary function for explicit prompting
         summary_text = generate_summary(transcribed_text, lang_code, progress)
     elif summarize_output and not transcribed_text:
         summary_text = "Transcription failed or was empty, cannot generate summary."
             )
             language_selector = gr.Dropdown(
+                # Updated label to reflect the fix
                 label="Select Expected Language (Uses initial prompt for better French accuracy)",
                 choices=LANGUAGE_CHOICES,
                 value="French",