Spaces:

clementBE
/

Audio_transcrib_base

Paused

App Files Files Community

clementBE commited on Sep 29, 2025

Commit

a85e37e

verified ·

1 Parent(s): 13d1791

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -11

app.py CHANGED Viewed

@@ -51,7 +51,6 @@ def get_model_pipeline(model_name, pipeline_type, progress):
         progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
-        # Use GPU if available (device 0), otherwise fallback to CPU
         device = 0 if torch.cuda.is_available() else "cpu"
         if pipeline_type == "asr":
@@ -150,13 +149,25 @@ def analyze_audio_and_get_chunks(audio_file):
         error_msg = f"Error analyzing audio: {e}"
         return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
-def generate_summary(text, progress):
-    """Generates an abstractive summary using a pre-trained T5 model."""
     try:
         summarizer = get_model_pipeline("t5-small", "summarization", progress)
         summary = summarizer(
-            text,
             max_length=SUMMARY_MAX_LENGTH,
             min_length=SUMMARY_MIN_LENGTH,
             do_sample=False
@@ -165,6 +176,7 @@ def generate_summary(text, progress):
         return summary
     except Exception as e:
         return f"Error during summarization: {e}"
 @spaces.GPU
 def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
@@ -205,16 +217,19 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
     # 2. Define generation arguments (Language fix implemented here)
     generate_kwargs = {}
     if selected_language != "Auto-Detect":
         lang_code = LANGUAGE_MAP.get(selected_language, None)
         if lang_code:
-            generate_kwargs["language"] = lang_code # Forces the model to use this language
     # 3. Transcribe the segment
     progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
     raw_output = pipe(
         audio_segment_to_process,
         return_timestamps="word",
         generate_kwargs=generate_kwargs
     )
@@ -233,7 +248,8 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
     # 5. Generate Summary (if requested)
     summary_text = ""
     if summarize_output and transcribed_text:
-        summary_text = generate_summary(transcribed_text, progress)
     elif summarize_output and not transcribed_text:
         summary_text = "Transcription failed or was empty, cannot generate summary."
@@ -294,9 +310,9 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
             # LANGUAGE FIX: Selector to explicitly set the expected language
             language_selector = gr.Dropdown(
-                label="Select Expected Language (Overrides Auto-Detect)",
                 choices=LANGUAGE_CHOICES,
-                value="French", # Default to French as it was the problem language
                 interactive=True
             )
@@ -328,12 +344,11 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
     analyze_btn.click(
         fn=analyze_audio_and_get_chunks,
         inputs=[audio_input],
-        outputs=[chunk_selector, status_text] # status_text now includes the GPU warning
     )
     transcribe_btn.click(
         fn=transcribe_and_export,
-        # UPDATED: Added language_selector input
         inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
         outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
     )

         progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
         device = 0 if torch.cuda.is_available() else "cpu"
         if pipeline_type == "asr":
         error_msg = f"Error analyzing audio: {e}"
         return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
+# --- MODIFIED: generate_summary to force output language ---
+def generate_summary(text, target_language_code, progress):
+    """Generates an abstractive summary using a pre-trained T5 model, prompting for the target language."""
     try:
         summarizer = get_model_pipeline("t5-small", "summarization", progress)
+        # T5-Small is multilingual but often defaults to English.
+        # We use a specific prompt based on the target language to force the output.
+        if target_language_code == "fr":
+            # Standard French summarization prompt format for T5-like models
+            prompt = f"résumer: {text}"
+        elif target_language_code == "es":
+            prompt = f"resumir: {text}"
+        else:
+            # Default English prompt (or for auto-detect)
+            prompt = f"summarize: {text}"
         summary = summarizer(
+            prompt,
             max_length=SUMMARY_MAX_LENGTH,
             min_length=SUMMARY_MIN_LENGTH,
             do_sample=False
         return summary
     except Exception as e:
         return f"Error during summarization: {e}"
+# -----------------------------------------------------------
 @spaces.GPU
 def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
     # 2. Define generation arguments (Language fix implemented here)
     generate_kwargs = {}
+    lang_code = None
     if selected_language != "Auto-Detect":
         lang_code = LANGUAGE_MAP.get(selected_language, None)
         if lang_code:
+            # Crucial for French fix: Pass the language code to Whisper
+            generate_kwargs["language"] = lang_code
     # 3. Transcribe the segment
     progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
     raw_output = pipe(
         audio_segment_to_process,
         return_timestamps="word",
+        # Pass the refined generate_kwargs
         generate_kwargs=generate_kwargs
     )
     # 5. Generate Summary (if requested)
     summary_text = ""
     if summarize_output and transcribed_text:
+        # Pass the language code to the summary function for explicit prompting
+        summary_text = generate_summary(transcribed_text, lang_code, progress)
     elif summarize_output and not transcribed_text:
         summary_text = "Transcription failed or was empty, cannot generate summary."
             # LANGUAGE FIX: Selector to explicitly set the expected language
             language_selector = gr.Dropdown(
+                label="Select Expected Language (Crucial for French/Non-English)",
                 choices=LANGUAGE_CHOICES,
+                value="French", # Default to French
                 interactive=True
             )
     analyze_btn.click(
         fn=analyze_audio_and_get_chunks,
         inputs=[audio_input],
+        outputs=[chunk_selector, status_text]
     )
     transcribe_btn.click(
         fn=transcribe_and_export,
         inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
         outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
     )