clementBE commited on
Commit
a85e37e
·
verified ·
1 Parent(s): 13d1791

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -51,7 +51,6 @@ def get_model_pipeline(model_name, pipeline_type, progress):
51
 
52
  progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
53
 
54
- # Use GPU if available (device 0), otherwise fallback to CPU
55
  device = 0 if torch.cuda.is_available() else "cpu"
56
 
57
  if pipeline_type == "asr":
@@ -150,13 +149,25 @@ def analyze_audio_and_get_chunks(audio_file):
150
  error_msg = f"Error analyzing audio: {e}"
151
  return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
152
 
153
- def generate_summary(text, progress):
154
- """Generates an abstractive summary using a pre-trained T5 model."""
 
155
  try:
156
  summarizer = get_model_pipeline("t5-small", "summarization", progress)
157
 
 
 
 
 
 
 
 
 
 
 
 
158
  summary = summarizer(
159
- text,
160
  max_length=SUMMARY_MAX_LENGTH,
161
  min_length=SUMMARY_MIN_LENGTH,
162
  do_sample=False
@@ -165,6 +176,7 @@ def generate_summary(text, progress):
165
  return summary
166
  except Exception as e:
167
  return f"Error during summarization: {e}"
 
168
 
169
  @spaces.GPU
170
  def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
@@ -205,16 +217,19 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
205
  # 2. Define generation arguments (Language fix implemented here)
206
  generate_kwargs = {}
207
 
 
208
  if selected_language != "Auto-Detect":
209
  lang_code = LANGUAGE_MAP.get(selected_language, None)
210
  if lang_code:
211
- generate_kwargs["language"] = lang_code # Forces the model to use this language
212
-
 
213
  # 3. Transcribe the segment
214
  progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
215
  raw_output = pipe(
216
  audio_segment_to_process,
217
  return_timestamps="word",
 
218
  generate_kwargs=generate_kwargs
219
  )
220
 
@@ -233,7 +248,8 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
233
  # 5. Generate Summary (if requested)
234
  summary_text = ""
235
  if summarize_output and transcribed_text:
236
- summary_text = generate_summary(transcribed_text, progress)
 
237
  elif summarize_output and not transcribed_text:
238
  summary_text = "Transcription failed or was empty, cannot generate summary."
239
 
@@ -294,9 +310,9 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
294
 
295
  # LANGUAGE FIX: Selector to explicitly set the expected language
296
  language_selector = gr.Dropdown(
297
- label="Select Expected Language (Overrides Auto-Detect)",
298
  choices=LANGUAGE_CHOICES,
299
- value="French", # Default to French as it was the problem language
300
  interactive=True
301
  )
302
 
@@ -328,12 +344,11 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
328
  analyze_btn.click(
329
  fn=analyze_audio_and_get_chunks,
330
  inputs=[audio_input],
331
- outputs=[chunk_selector, status_text] # status_text now includes the GPU warning
332
  )
333
 
334
  transcribe_btn.click(
335
  fn=transcribe_and_export,
336
- # UPDATED: Added language_selector input
337
  inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
338
  outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
339
  )
 
51
 
52
  progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
53
 
 
54
  device = 0 if torch.cuda.is_available() else "cpu"
55
 
56
  if pipeline_type == "asr":
 
149
  error_msg = f"Error analyzing audio: {e}"
150
  return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
151
 
152
+ # --- MODIFIED: generate_summary to force output language ---
153
+ def generate_summary(text, target_language_code, progress):
154
+ """Generates an abstractive summary using a pre-trained T5 model, prompting for the target language."""
155
  try:
156
  summarizer = get_model_pipeline("t5-small", "summarization", progress)
157
 
158
+ # T5-Small is multilingual but often defaults to English.
159
+ # We use a specific prompt based on the target language to force the output.
160
+ if target_language_code == "fr":
161
+ # Standard French summarization prompt format for T5-like models
162
+ prompt = f"résumer: {text}"
163
+ elif target_language_code == "es":
164
+ prompt = f"resumir: {text}"
165
+ else:
166
+ # Default English prompt (or for auto-detect)
167
+ prompt = f"summarize: {text}"
168
+
169
  summary = summarizer(
170
+ prompt,
171
  max_length=SUMMARY_MAX_LENGTH,
172
  min_length=SUMMARY_MIN_LENGTH,
173
  do_sample=False
 
176
  return summary
177
  except Exception as e:
178
  return f"Error during summarization: {e}"
179
+ # -----------------------------------------------------------
180
 
181
  @spaces.GPU
182
  def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
 
217
  # 2. Define generation arguments (Language fix implemented here)
218
  generate_kwargs = {}
219
 
220
+ lang_code = None
221
  if selected_language != "Auto-Detect":
222
  lang_code = LANGUAGE_MAP.get(selected_language, None)
223
  if lang_code:
224
+ # Crucial for French fix: Pass the language code to Whisper
225
+ generate_kwargs["language"] = lang_code
226
+
227
  # 3. Transcribe the segment
228
  progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
229
  raw_output = pipe(
230
  audio_segment_to_process,
231
  return_timestamps="word",
232
+ # Pass the refined generate_kwargs
233
  generate_kwargs=generate_kwargs
234
  )
235
 
 
248
  # 5. Generate Summary (if requested)
249
  summary_text = ""
250
  if summarize_output and transcribed_text:
251
+ # Pass the language code to the summary function for explicit prompting
252
+ summary_text = generate_summary(transcribed_text, lang_code, progress)
253
  elif summarize_output and not transcribed_text:
254
  summary_text = "Transcription failed or was empty, cannot generate summary."
255
 
 
310
 
311
  # LANGUAGE FIX: Selector to explicitly set the expected language
312
  language_selector = gr.Dropdown(
313
+ label="Select Expected Language (Crucial for French/Non-English)",
314
  choices=LANGUAGE_CHOICES,
315
+ value="French", # Default to French
316
  interactive=True
317
  )
318
 
 
344
  analyze_btn.click(
345
  fn=analyze_audio_and_get_chunks,
346
  inputs=[audio_input],
347
+ outputs=[chunk_selector, status_text]
348
  )
349
 
350
  transcribe_btn.click(
351
  fn=transcribe_and_export,
 
352
  inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
353
  outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
354
  )