Update app.py
Browse files
app.py
CHANGED
|
@@ -51,7 +51,6 @@ def get_model_pipeline(model_name, pipeline_type, progress):
|
|
| 51 |
|
| 52 |
progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
|
| 53 |
|
| 54 |
-
# Use GPU if available (device 0), otherwise fallback to CPU
|
| 55 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 56 |
|
| 57 |
if pipeline_type == "asr":
|
|
@@ -150,13 +149,25 @@ def analyze_audio_and_get_chunks(audio_file):
|
|
| 150 |
error_msg = f"Error analyzing audio: {e}"
|
| 151 |
return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
|
|
|
| 155 |
try:
|
| 156 |
summarizer = get_model_pipeline("t5-small", "summarization", progress)
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
summary = summarizer(
|
| 159 |
-
|
| 160 |
max_length=SUMMARY_MAX_LENGTH,
|
| 161 |
min_length=SUMMARY_MIN_LENGTH,
|
| 162 |
do_sample=False
|
|
@@ -165,6 +176,7 @@ def generate_summary(text, progress):
|
|
| 165 |
return summary
|
| 166 |
except Exception as e:
|
| 167 |
return f"Error during summarization: {e}"
|
|
|
|
| 168 |
|
| 169 |
@spaces.GPU
|
| 170 |
def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
|
|
@@ -205,16 +217,19 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
|
|
| 205 |
# 2. Define generation arguments (Language fix implemented here)
|
| 206 |
generate_kwargs = {}
|
| 207 |
|
|
|
|
| 208 |
if selected_language != "Auto-Detect":
|
| 209 |
lang_code = LANGUAGE_MAP.get(selected_language, None)
|
| 210 |
if lang_code:
|
| 211 |
-
|
| 212 |
-
|
|
|
|
| 213 |
# 3. Transcribe the segment
|
| 214 |
progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
|
| 215 |
raw_output = pipe(
|
| 216 |
audio_segment_to_process,
|
| 217 |
return_timestamps="word",
|
|
|
|
| 218 |
generate_kwargs=generate_kwargs
|
| 219 |
)
|
| 220 |
|
|
@@ -233,7 +248,8 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
|
|
| 233 |
# 5. Generate Summary (if requested)
|
| 234 |
summary_text = ""
|
| 235 |
if summarize_output and transcribed_text:
|
| 236 |
-
|
|
|
|
| 237 |
elif summarize_output and not transcribed_text:
|
| 238 |
summary_text = "Transcription failed or was empty, cannot generate summary."
|
| 239 |
|
|
@@ -294,9 +310,9 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
|
|
| 294 |
|
| 295 |
# LANGUAGE FIX: Selector to explicitly set the expected language
|
| 296 |
language_selector = gr.Dropdown(
|
| 297 |
-
label="Select Expected Language (
|
| 298 |
choices=LANGUAGE_CHOICES,
|
| 299 |
-
value="French", # Default to French
|
| 300 |
interactive=True
|
| 301 |
)
|
| 302 |
|
|
@@ -328,12 +344,11 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
|
|
| 328 |
analyze_btn.click(
|
| 329 |
fn=analyze_audio_and_get_chunks,
|
| 330 |
inputs=[audio_input],
|
| 331 |
-
outputs=[chunk_selector, status_text]
|
| 332 |
)
|
| 333 |
|
| 334 |
transcribe_btn.click(
|
| 335 |
fn=transcribe_and_export,
|
| 336 |
-
# UPDATED: Added language_selector input
|
| 337 |
inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
|
| 338 |
outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
|
| 339 |
)
|
|
|
|
| 51 |
|
| 52 |
progress(progress_start, desc="🚀 Initializing ZeroGPU instance..." if pipeline_type == "asr" else desc)
|
| 53 |
|
|
|
|
| 54 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 55 |
|
| 56 |
if pipeline_type == "asr":
|
|
|
|
| 149 |
error_msg = f"Error analyzing audio: {e}"
|
| 150 |
return gr.Dropdown(choices=["Full Audio"], value="Full Audio", interactive=False), error_msg
|
| 151 |
|
| 152 |
+
# --- MODIFIED: generate_summary to force output language ---
|
| 153 |
+
def generate_summary(text, target_language_code, progress):
|
| 154 |
+
"""Generates an abstractive summary using a pre-trained T5 model, prompting for the target language."""
|
| 155 |
try:
|
| 156 |
summarizer = get_model_pipeline("t5-small", "summarization", progress)
|
| 157 |
|
| 158 |
+
# T5-Small is multilingual but often defaults to English.
|
| 159 |
+
# We use a specific prompt based on the target language to force the output.
|
| 160 |
+
if target_language_code == "fr":
|
| 161 |
+
# Standard French summarization prompt format for T5-like models
|
| 162 |
+
prompt = f"résumer: {text}"
|
| 163 |
+
elif target_language_code == "es":
|
| 164 |
+
prompt = f"resumir: {text}"
|
| 165 |
+
else:
|
| 166 |
+
# Default English prompt (or for auto-detect)
|
| 167 |
+
prompt = f"summarize: {text}"
|
| 168 |
+
|
| 169 |
summary = summarizer(
|
| 170 |
+
prompt,
|
| 171 |
max_length=SUMMARY_MAX_LENGTH,
|
| 172 |
min_length=SUMMARY_MIN_LENGTH,
|
| 173 |
do_sample=False
|
|
|
|
| 176 |
return summary
|
| 177 |
except Exception as e:
|
| 178 |
return f"Error during summarization: {e}"
|
| 179 |
+
# -----------------------------------------------------------
|
| 180 |
|
| 181 |
@spaces.GPU
|
| 182 |
def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
|
|
|
|
| 217 |
# 2. Define generation arguments (Language fix implemented here)
|
| 218 |
generate_kwargs = {}
|
| 219 |
|
| 220 |
+
lang_code = None
|
| 221 |
if selected_language != "Auto-Detect":
|
| 222 |
lang_code = LANGUAGE_MAP.get(selected_language, None)
|
| 223 |
if lang_code:
|
| 224 |
+
# Crucial for French fix: Pass the language code to Whisper
|
| 225 |
+
generate_kwargs["language"] = lang_code
|
| 226 |
+
|
| 227 |
# 3. Transcribe the segment
|
| 228 |
progress(0.75, desc=f"🎤 Transcribing {chunk_choice}...")
|
| 229 |
raw_output = pipe(
|
| 230 |
audio_segment_to_process,
|
| 231 |
return_timestamps="word",
|
| 232 |
+
# Pass the refined generate_kwargs
|
| 233 |
generate_kwargs=generate_kwargs
|
| 234 |
)
|
| 235 |
|
|
|
|
| 248 |
# 5. Generate Summary (if requested)
|
| 249 |
summary_text = ""
|
| 250 |
if summarize_output and transcribed_text:
|
| 251 |
+
# Pass the language code to the summary function for explicit prompting
|
| 252 |
+
summary_text = generate_summary(transcribed_text, lang_code, progress)
|
| 253 |
elif summarize_output and not transcribed_text:
|
| 254 |
summary_text = "Transcription failed or was empty, cannot generate summary."
|
| 255 |
|
|
|
|
| 310 |
|
| 311 |
# LANGUAGE FIX: Selector to explicitly set the expected language
|
| 312 |
language_selector = gr.Dropdown(
|
| 313 |
+
label="Select Expected Language (Crucial for French/Non-English)",
|
| 314 |
choices=LANGUAGE_CHOICES,
|
| 315 |
+
value="French", # Default to French
|
| 316 |
interactive=True
|
| 317 |
)
|
| 318 |
|
|
|
|
| 344 |
analyze_btn.click(
|
| 345 |
fn=analyze_audio_and_get_chunks,
|
| 346 |
inputs=[audio_input],
|
| 347 |
+
outputs=[chunk_selector, status_text]
|
| 348 |
)
|
| 349 |
|
| 350 |
transcribe_btn.click(
|
| 351 |
fn=transcribe_and_export,
|
|
|
|
| 352 |
inputs=[audio_input, model_selector, chunk_selector, language_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summarize_checkbox],
|
| 353 |
outputs=[transcription_output, summary_output, downloadable_files_output, audio_input, status_text]
|
| 354 |
)
|