Spaces:

clementBE
/

Audio_transcrib_base

Paused

App Files Files Community

clementBE commited on Sep 26, 2025

Commit

42e6d95

verified ·

1 Parent(s): e32fb4e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -7

app.py CHANGED Viewed

@@ -20,7 +20,9 @@ MODEL_SIZES = {
 # Cache for loaded models
 model_cache = {}
 def get_model_pipeline(model_name, progress):
     if model_name not in model_cache:
         progress(0, desc="🚀 Initializing ZeroGPU instance...")
@@ -35,6 +37,16 @@ def get_model_pipeline(model_name, progress):
         progress(0.5, desc="✅ Model loaded successfully!")
     return model_cache[model_name]
 # --- Export Functions ---
 def create_vtt(segments, file_path):
     with open(file_path, "w", encoding="utf-8") as f:
@@ -64,9 +76,9 @@ def create_docx(segments, file_path, with_timestamps):
 # --- Main Transcription Function ---
 @spaces.GPU
-def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, docx_no_timestamp_output, progress=gr.Progress()):
     if file is None:
-        return (None, None, None, "Please upload an audio or video file.")
     start_time = time.time()
     ext = os.path.splitext(file)[1].lower()
@@ -85,6 +97,7 @@ def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, d
     pipe = get_model_pipeline(model_size, progress)
     progress(0.75, desc="🎤 Transcribing audio...")
     if model_size == "Distil-Large-v3-FR (French-Specific)":
         raw_output = pipe(audio_file_path, return_timestamps=True, generate_kwargs={"language": "fr"})
     else:
@@ -109,23 +122,33 @@ def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, d
         create_docx(segments, docx_no_ts_path, with_timestamps=False)
         outputs["DOCX (without timestamps)"] = docx_no_ts_path
     end_time = time.time()
     total_time = end_time - start_time
-    transcribed_text = raw_output['text']
     downloadable_files = [path for path in outputs.values()]
     status_message = f"✅ Transcription complete! Total time: {total_time:.2f} seconds."
     return (
         transcribed_text,
         gr.Files(value=downloadable_files, label="Download Transcripts"),
-        gr.Audio(value=None),
         status_message
     )
 # --- Gradio UI ---
 with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
     gr.Markdown("# 🎙️ Whisper ZeroGPU Transcription")
-    gr.Markdown("Transcribe audio or video files with timestamps and choose your output format.")
     with gr.Row():
         audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio/Video File")
@@ -140,17 +163,19 @@ with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
                 vtt_checkbox = gr.Checkbox(label="VTT", value=True)
                 docx_ts_checkbox = gr.Checkbox(label="DOCX (with timestamps)", value=False)
                 docx_no_ts_checkbox = gr.Checkbox(label="DOCX (without timestamps)", value=True)
             transcribe_btn = gr.Button("Transcribe", variant="primary")
             status_text = gr.Textbox(label="Status", interactive=False)
     transcription_output = gr.Textbox(label="Full Transcription", lines=10)
     downloadable_files_output = gr.Files(label="Download Transcripts")
     transcribe_btn.click(
         fn=transcribe_and_export,
-        inputs=[audio_input, model_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox],
-        outputs=[transcription_output, downloadable_files_output, audio_input, status_text]
     )
 if __name__ == "__main__":

 # Cache for loaded models
 model_cache = {}
+summary_cache = {}
+# --- Whisper pipeline loader ---
 def get_model_pipeline(model_name, progress):
     if model_name not in model_cache:
         progress(0, desc="🚀 Initializing ZeroGPU instance...")
         progress(0.5, desc="✅ Model loaded successfully!")
     return model_cache[model_name]
+# --- French summarization pipeline ---
+def get_summary_pipeline():
+    if "summarizer" not in summary_cache:
+        # French-compatible summarization
+        summary_cache["summarizer"] = pipeline(
+            "summarization",
+            model="csebuetnlp/mT5_multilingual_XLSum"
+        )
+    return summary_cache["summarizer"]
 # --- Export Functions ---
 def create_vtt(segments, file_path):
     with open(file_path, "w", encoding="utf-8") as f:
 # --- Main Transcription Function ---
 @spaces.GPU
+def transcribe_and_export(file, model_size, vtt_output, docx_timestamp_output, docx_no_timestamp_output, generate_summary, progress=gr.Progress()):
     if file is None:
+        return (None, None, None, None, "Please upload an audio or video file.")
     start_time = time.time()
     ext = os.path.splitext(file)[1].lower()
     pipe = get_model_pipeline(model_size, progress)
     progress(0.75, desc="🎤 Transcribing audio...")
+    # Set French language if using French-specific model
     if model_size == "Distil-Large-v3-FR (French-Specific)":
         raw_output = pipe(audio_file_path, return_timestamps=True, generate_kwargs={"language": "fr"})
     else:
         create_docx(segments, docx_no_ts_path, with_timestamps=False)
         outputs["DOCX (without timestamps)"] = docx_no_ts_path
+    transcribed_text = raw_output['text']
+    # Generate summary if requested
+    summary_text = None
+    if generate_summary:
+        progress(0.95, desc="📝 Generating summary...")
+        summarizer = get_summary_pipeline()
+        summary_output = summarizer(transcribed_text, max_length=150, min_length=30, do_sample=False)
+        summary_text = summary_output[0]['summary_text']
     end_time = time.time()
     total_time = end_time - start_time
     downloadable_files = [path for path in outputs.values()]
     status_message = f"✅ Transcription complete! Total time: {total_time:.2f} seconds."
     return (
         transcribed_text,
         gr.Files(value=downloadable_files, label="Download Transcripts"),
+        audio_file_path,
+        summary_text,
         status_message
     )
 # --- Gradio UI ---
 with gr.Blocks(title="Whisper ZeroGPU Transcription") as demo:
     gr.Markdown("# 🎙️ Whisper ZeroGPU Transcription")
+    gr.Markdown("Transcribe audio or video files with timestamps, and optionally generate a French summary.")
     with gr.Row():
         audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio/Video File")
                 vtt_checkbox = gr.Checkbox(label="VTT", value=True)
                 docx_ts_checkbox = gr.Checkbox(label="DOCX (with timestamps)", value=False)
                 docx_no_ts_checkbox = gr.Checkbox(label="DOCX (without timestamps)", value=True)
+            summary_checkbox = gr.Checkbox(label="Generate Summary", value=False)
             transcribe_btn = gr.Button("Transcribe", variant="primary")
             status_text = gr.Textbox(label="Status", interactive=False)
     transcription_output = gr.Textbox(label="Full Transcription", lines=10)
     downloadable_files_output = gr.Files(label="Download Transcripts")
+    summary_output = gr.Textbox(label="Summary", lines=5)
     transcribe_btn.click(
         fn=transcribe_and_export,
+        inputs=[audio_input, model_selector, vtt_checkbox, docx_ts_checkbox, docx_no_ts_checkbox, summary_checkbox],
+        outputs=[transcription_output, downloadable_files_output, audio_input, summary_output, status_text]
     )
 if __name__ == "__main__":