Spaces:

GannaEslam38
/

whisper_code-switching

Running

App Files Files Community

GannaEslam38 commited on 10 days ago

Commit

dd0312e

verified ·

1 Parent(s): f2b75ac

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 import os
 import sys
 from google import genai
 # ==============================================================================
 # I. ASR Setup (Whisper)
@@ -137,6 +138,7 @@ def smart_summarize_and_merge(text_to_summarize: str) -> str:
     final_prompt = f"""
     You are a professional text summarizer. {prompt_type} into clear, comprehensive **Bullet Points**.
     You must integrate all key points from all sections.
     Language Instructions:
@@ -162,12 +164,13 @@ def smart_summarize_and_merge(text_to_summarize: str) -> str:
 def full_pipeline(audio_path):
     """
     Manages the full pipeline: ASR -> Correction -> Summarization.
     """
     if asr_pipeline is None:
-        return ("Error loading Whisper model.", "No correction applied.", "No summary generated.")
     if audio_path is None:
-        return ("Please upload an audio file first.", "No correction applied.", "No summary generated.")
     # 1. ASR Step: Convert audio to raw text
     print(f"Step 1: Starting ASR for {audio_path}")
@@ -176,10 +179,10 @@ def full_pipeline(audio_path):
         raw_asr_text = asr_result['text'].strip() if 'text' in asr_result else "Failed to extract text."
     except Exception as e:
         error_msg = f"An error occurred during the ASR process (Whisper): {e}"
-        return (error_msg, "No correction applied.", "No summary generated.")
-    if not raw_asr_text or raw_asr_text == "Failed to extract text.":
-        return ("Whisper failed to extract meaningful text.", "No correction applied.", "No summary generated.")
     # 2. Correction Step: Correct the raw text
     print(f"Step 2: Starting text correction for {len(raw_asr_text)} characters.")
@@ -194,19 +197,18 @@ def full_pipeline(audio_path):
     print("Pipeline finished successfully.")
-    # Return three outputs: Raw Text, Corrected Text, Summary
-    return raw_asr_text, corrected_text, clean_summary
 # ==============================================================================
 # IV. Gradio UI Definition
 # ==============================================================================
-title = "✨ Bilingual Smart Summarization Tool (ASR + Gemini Pipeline)"
 description = (
-    "A fully integrated pipeline: 1) Whisper Code-Switching model converts audio to raw text, "
-    "2) Gemini corrects spelling and punctuation errors, "
-    "3) Gemini summarizes the long text into bullet points (Formal Arabic, preserving English terms)."
 )
 gr.Interface(
@@ -216,12 +218,12 @@ gr.Interface(
         label="Upload an audio file (WAV, MP3, etc.) or record directly"
     ),
     outputs=[
-        gr.Textbox(label="1. Raw Text Extracted by Whisper", lines=5),
-        gr.Textbox(label="2. Corrected and Formatted Text (Ready for Summarization)", lines=5),
-        gr.Textbox(label="3. Final Comprehensive Summary", lines=5)
     ],
     title=title,
     description=description,
     live=False
-    # تم حذف allow_flagging="never"
 ).launch()

 import os
 import sys
 from google import genai
+# Note: PyMuPDF is implicitly required in requirements.txt for handling large text chunks.
 # ==============================================================================
 # I. ASR Setup (Whisper)
     final_prompt = f"""
     You are a professional text summarizer. {prompt_type} into clear, comprehensive **Bullet Points**.
+    Use **round bullet points (•)** for the list items.
     You must integrate all key points from all sections.
     Language Instructions:
 def full_pipeline(audio_path):
     """
     Manages the full pipeline: ASR -> Correction -> Summarization.
+    Outputs only the Corrected Text and the Final Summary.
     """
     if asr_pipeline is None:
+        return ("Error loading the Transcription model.", "No summary generated.")
     if audio_path is None:
+        return ("Please upload an audio file first.", "No summary generated.")
     # 1. ASR Step: Convert audio to raw text
     print(f"Step 1: Starting ASR for {audio_path}")
         raw_asr_text = asr_result['text'].strip() if 'text' in asr_result else "Failed to extract text."
     except Exception as e:
         error_msg = f"An error occurred during the ASR process (Whisper): {e}"
+        return (error_msg, "No summary generated.")
+    if not raw_asr_text or raw_asr_text == "Failed to extract meaningful text.":
+        return ("Transcription failed to extract meaningful text.", "No summary generated.")
     # 2. Correction Step: Correct the raw text
     print(f"Step 2: Starting text correction for {len(raw_asr_text)} characters.")
     print("Pipeline finished successfully.")
+    # Return only Corrected Text and Summary (two outputs)
+    return corrected_text, clean_summary
 # ==============================================================================
 # IV. Gradio UI Definition
 # ==============================================================================
+title = "🎙️ Audio Transcription, Correction, and Smart Summarization Tool"
 description = (
+    "Upload an audio file to automatically convert speech to text, correct spelling and punctuation errors, "
+    "and receive a comprehensive summary in bullet points (Formal Arabic, preserving English terms)."
 )
 gr.Interface(
         label="Upload an audio file (WAV, MP3, etc.) or record directly"
     ),
     outputs=[
+        # Output 1: Corrected Text
+        gr.Textbox(label="Corrected and Formatted Text", lines=8),
+        # Output 2: Final Summary
+        gr.Textbox(label="Final Comprehensive Summary", lines=8)
     ],
     title=title,
     description=description,
     live=False
 ).launch()