Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ MODEL_SIZES = {
|
|
| 13 |
"Tiny (Fastest)": "openai/whisper-tiny",
|
| 14 |
"Base (Faster)": "openai/whisper-base",
|
| 15 |
"Small (Balanced)": "openai/whisper-small",
|
|
|
|
| 16 |
"Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
|
| 17 |
}
|
| 18 |
|
|
@@ -36,10 +37,9 @@ LANGUAGE_MAP = {
|
|
| 36 |
}
|
| 37 |
LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
|
| 38 |
|
| 39 |
-
# --- CRUCIAL FIX:
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
FRENCH_TRANSCRIPTION_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
|
| 43 |
|
| 44 |
def get_model_pipeline(model_name, pipeline_type, progress):
|
| 45 |
"""
|
|
@@ -158,10 +158,12 @@ def generate_summary(text, target_language_code, progress):
|
|
| 158 |
summarizer = get_model_pipeline("t5-small", "summarization", progress)
|
| 159 |
|
| 160 |
if target_language_code == "fr":
|
|
|
|
| 161 |
prompt = f"résumer: {text}"
|
| 162 |
elif target_language_code == "es":
|
| 163 |
prompt = f"resumir: {text}"
|
| 164 |
else:
|
|
|
|
| 165 |
prompt = f"summarize: {text}"
|
| 166 |
|
| 167 |
summary = summarizer(
|
|
@@ -178,8 +180,7 @@ def generate_summary(text, target_language_code, progress):
|
|
| 178 |
@spaces.GPU
|
| 179 |
def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
|
| 180 |
"""
|
| 181 |
-
Main function to transcribe audio and export.
|
| 182 |
-
'initial_prompt' to fix language adherence, particularly for large models.
|
| 183 |
"""
|
| 184 |
if audio_file is None:
|
| 185 |
return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
|
|
@@ -220,10 +221,10 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
|
|
| 220 |
if lang_code:
|
| 221 |
generate_kwargs["language"] = lang_code
|
| 222 |
|
| 223 |
-
# --- CRUCIAL FIX for large models
|
| 224 |
if lang_code == "fr":
|
| 225 |
-
#
|
| 226 |
-
generate_kwargs["
|
| 227 |
# -----------------------------------------------------------
|
| 228 |
|
| 229 |
# 3. Transcribe the segment
|
|
@@ -249,6 +250,7 @@ def transcribe_and_export(audio_file, model_size, chunk_choice, selected_languag
|
|
| 249 |
# 5. Generate Summary (if requested)
|
| 250 |
summary_text = ""
|
| 251 |
if summarize_output and transcribed_text:
|
|
|
|
| 252 |
summary_text = generate_summary(transcribed_text, lang_code, progress)
|
| 253 |
elif summarize_output and not transcribed_text:
|
| 254 |
summary_text = "Transcription failed or was empty, cannot generate summary."
|
|
@@ -308,6 +310,7 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription & Summarization") as demo:
|
|
| 308 |
)
|
| 309 |
|
| 310 |
language_selector = gr.Dropdown(
|
|
|
|
| 311 |
label="Select Expected Language (Uses initial prompt for better French accuracy)",
|
| 312 |
choices=LANGUAGE_CHOICES,
|
| 313 |
value="French",
|
|
|
|
| 13 |
"Tiny (Fastest)": "openai/whisper-tiny",
|
| 14 |
"Base (Faster)": "openai/whisper-base",
|
| 15 |
"Small (Balanced)": "openai/whisper-small",
|
| 16 |
+
# FIX: Remove redundant entry and keep only the general-purpose one
|
| 17 |
"Distil-Large-v3 (General Purpose)": "distil-whisper/distil-large-v3",
|
| 18 |
}
|
| 19 |
|
|
|
|
| 37 |
}
|
| 38 |
LANGUAGE_CHOICES = ["Auto-Detect"] + list(LANGUAGE_MAP.keys())
|
| 39 |
|
| 40 |
+
# --- CRUCIAL FIX: Initial Prompt for Transcription ---
|
| 41 |
+
# Adding an initial prompt greatly increases adherence to the target language.
|
| 42 |
+
FRENCH_INITIAL_PROMPT = "Ceci est une transcription en français." # "This is a transcription in French."
|
|
|
|
| 43 |
|
| 44 |
def get_model_pipeline(model_name, pipeline_type, progress):
|
| 45 |
"""
|
|
|
|
| 158 |
summarizer = get_model_pipeline("t5-small", "summarization", progress)
|
| 159 |
|
| 160 |
if target_language_code == "fr":
|
| 161 |
+
# French summarization prompt
|
| 162 |
prompt = f"résumer: {text}"
|
| 163 |
elif target_language_code == "es":
|
| 164 |
prompt = f"resumir: {text}"
|
| 165 |
else:
|
| 166 |
+
# Default English prompt (or for auto-detect/other languages)
|
| 167 |
prompt = f"summarize: {text}"
|
| 168 |
|
| 169 |
summary = summarizer(
|
|
|
|
| 180 |
@spaces.GPU
|
| 181 |
def transcribe_and_export(audio_file, model_size, chunk_choice, selected_language, vtt_output, docx_timestamp_output, docx_no_timestamp_output, summarize_output, progress=gr.Progress()):
|
| 182 |
"""
|
| 183 |
+
Main function to transcribe audio and export. Includes initial_prompt fix for language adherence.
|
|
|
|
| 184 |
"""
|
| 185 |
if audio_file is None:
|
| 186 |
return (None, "", None, gr.Audio(value=None), "Please upload an audio file.")
|
|
|
|
| 221 |
if lang_code:
|
| 222 |
generate_kwargs["language"] = lang_code
|
| 223 |
|
| 224 |
+
# --- CRUCIAL FIX for large models (Distil-Large-v3, etc.) ---
|
| 225 |
if lang_code == "fr":
|
| 226 |
+
# Add the initial prompt to reinforce French transcription
|
| 227 |
+
generate_kwargs["prompt"] = FRENCH_INITIAL_PROMPT
|
| 228 |
# -----------------------------------------------------------
|
| 229 |
|
| 230 |
# 3. Transcribe the segment
|
|
|
|
| 250 |
# 5. Generate Summary (if requested)
|
| 251 |
summary_text = ""
|
| 252 |
if summarize_output and transcribed_text:
|
| 253 |
+
# Pass the language code to the summary function for explicit prompting
|
| 254 |
summary_text = generate_summary(transcribed_text, lang_code, progress)
|
| 255 |
elif summarize_output and not transcribed_text:
|
| 256 |
summary_text = "Transcription failed or was empty, cannot generate summary."
|
|
|
|
| 310 |
)
|
| 311 |
|
| 312 |
language_selector = gr.Dropdown(
|
| 313 |
+
# Updated label to reflect the fix
|
| 314 |
label="Select Expected Language (Uses initial prompt for better French accuracy)",
|
| 315 |
choices=LANGUAGE_CHOICES,
|
| 316 |
value="French",
|