clementBE commited on
Commit
a602b66
·
verified ·
1 Parent(s): ceaed29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -13,6 +13,7 @@ MODEL_SIZES = {
13
  "Tiny (Fastest)": "openai/whisper-tiny",
14
  "Base (Faster)": "openai/whisper-base",
15
  "Small (Balanced)": "openai/whisper-small",
 
16
  "Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
17
  }
18
 
@@ -36,10 +37,9 @@ LANGUAGE_MAP = {
36
  }
37
  LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
38
 
39
- # --- CRUCIAL FIX: Corrected Initial Prompt key ---
40
- # Changed from 'FRENCH_INITIAL_PROMPT' to 'FRENCH_TRANSCRIPTION_PROMPT' for clarity,
41
- # and will be used as the value for the "initial_prompt" key.
42
- FRENCH_TRANSCRIPTION_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
43
 
44
  def get_model_pipeline(model_name, pipeline_type, progress):
45
  """
@@ -158,10 +158,12 @@ def generate_summary(text, target_language_code, progress):
158
  summarizer = get_model_pipeline("t5-small", "summarization", progress)
159
 
160
  if target_language_code == "fr":
 
161
  prompt = f"résumer: {text}"
162
  elif target_language_code == "es":
163
  prompt = f"resumir: {text}"
164
  else:
 
165
  prompt = f"summarize: {text}"
166
 
167
  summary = summarizer(
@@ -178,8 +180,7 @@ def generate_summary(text, target_language_code, progress):
178
  @spaces.GPU
179
  def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
180
  """
181
- Main function to transcribe audio and export. Uses selected_language and
182
- 'initial_prompt' to fix language adherence, particularly for large models.
183
  """
184
  if audio_file is None:
185
  return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
@@ -220,10 +221,10 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
220
  if lang_code:
221
  generate_kwargs["language"] = lang_code
222
 
223
- # --- CRUCIAL FIX for large models: Use 'initial_prompt' ---
224
  if lang_code == "fr":
225
- # Changed from 'prompt' to 'initial_prompt' to resolve the ValueError
226
- generate_kwargs["initial_prompt"] = FRENCH_TRANSCRIPTION_PROMPT
227
  # -----------------------------------------------------------
228
 
229
  # 3. Transcribe the segment
@@ -249,6 +250,7 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
249
  # 5. Generate Summary (if requested)
250
  summary_text = ""
251
  if summarize_output and transcribed_text:
 
252
  summary_text = generate_summary(transcribed_text, lang_code, progress)
253
  elif summarize_output and not transcribed_text:
254
  summary_text = "Transcription failed or was empty, cannot generate summary."
@@ -308,6 +310,7 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
308
  )
309
 
310
  language_selector = gr.Dropdown(
 
311
  label="Select Expected Language (Uses initial prompt for better French accuracy)",
312
  choices=LANGUAGE_CHOICES,
313
  value="French",
 
13
  "Tiny (Fastest)": "openai/whisper-tiny",
14
  "Base (Faster)": "openai/whisper-base",
15
  "Small (Balanced)": "openai/whisper-small",
16
+ # FIX: Remove redundant entry and keep only the general-purpose one
17
  "Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
18
  }
19
 
 
37
  }
38
  LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
39
 
40
+ # --- CRUCIAL FIX: Initial Prompt for Transcription ---
41
+ # Adding an initial prompt greatly increases adherence to the target language.
42
+ FRENCH_INITIAL_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
 
43
 
44
  def get_model_pipeline(model_name, pipeline_type, progress):
45
  """
 
158
  summarizer = get_model_pipeline("t5-small", "summarization", progress)
159
 
160
  if target_language_code == "fr":
161
+ # French summarization prompt
162
  prompt = f"résumer: {text}"
163
  elif target_language_code == "es":
164
  prompt = f"resumir: {text}"
165
  else:
166
+ # Default English prompt (or for auto-detect/other languages)
167
  prompt = f"summarize: {text}"
168
 
169
  summary = summarizer(
 
180
  @spaces.GPU
181
  def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
182
  """
183
+ Main function to transcribe audio and export. Includes initial_prompt fix for language adherence.
 
184
  """
185
  if audio_file is None:
186
  return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
 
221
  if lang_code:
222
  generate_kwargs["language"] = lang_code
223
 
224
+ # --- CRUCIAL FIX for large models (Distil-Large-v3, etc.) ---
225
  if lang_code == "fr":
226
+ # Add the initial prompt to reinforce French transcription
227
+ generate_kwargs["prompt"] = FRENCH_INITIAL_PROMPT
228
  # -----------------------------------------------------------
229
 
230
  # 3. Transcribe the segment
 
250
  # 5. Generate Summary (if requested)
251
  summary_text = ""
252
  if summarize_output and transcribed_text:
253
+ # Pass the language code to the summary function for explicit prompting
254
  summary_text = generate_summary(transcribed_text, lang_code, progress)
255
  elif summarize_output and not transcribed_text:
256
  summary_text = "Transcription failed or was empty, cannot generate summary."
 
310
  )
311
 
312
  language_selector = gr.Dropdown(
313
+ # Updated label to reflect the fix
314
  label="Select Expected Language (Uses initial prompt for better French accuracy)",
315
  choices=LANGUAGE_CHOICES,
316
  value="French",