Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
import os
|
| 5 |
import sys
|
| 6 |
from google import genai
|
|
|
|
| 7 |
|
| 8 |
# ==============================================================================
|
| 9 |
# I. ASR Setup (Whisper)
|
|
@@ -137,6 +138,7 @@ def smart_summarize_and_merge(text_to_summarize: str) -> str:
|
|
| 137 |
|
| 138 |
final_prompt = f"""
|
| 139 |
You are a professional text summarizer. {prompt_type} into clear, comprehensive **Bullet Points**.
|
|
|
|
| 140 |
You must integrate all key points from all sections.
|
| 141 |
|
| 142 |
Language Instructions:
|
|
@@ -162,12 +164,13 @@ def smart_summarize_and_merge(text_to_summarize: str) -> str:
|
|
| 162 |
def full_pipeline(audio_path):
|
| 163 |
"""
|
| 164 |
Manages the full pipeline: ASR -> Correction -> Summarization.
|
|
|
|
| 165 |
"""
|
| 166 |
if asr_pipeline is None:
|
| 167 |
-
return ("Error loading
|
| 168 |
|
| 169 |
if audio_path is None:
|
| 170 |
-
return ("Please upload an audio file first.", "No
|
| 171 |
|
| 172 |
# 1. ASR Step: Convert audio to raw text
|
| 173 |
print(f"Step 1: Starting ASR for {audio_path}")
|
|
@@ -176,10 +179,10 @@ def full_pipeline(audio_path):
|
|
| 176 |
raw_asr_text = asr_result['text'].strip() if 'text' in asr_result else "Failed to extract text."
|
| 177 |
except Exception as e:
|
| 178 |
error_msg = f"An error occurred during the ASR process (Whisper): {e}"
|
| 179 |
-
return (error_msg, "No
|
| 180 |
|
| 181 |
-
if not raw_asr_text or raw_asr_text == "Failed to extract text.":
|
| 182 |
-
return ("
|
| 183 |
|
| 184 |
# 2. Correction Step: Correct the raw text
|
| 185 |
print(f"Step 2: Starting text correction for {len(raw_asr_text)} characters.")
|
|
@@ -194,19 +197,18 @@ def full_pipeline(audio_path):
|
|
| 194 |
|
| 195 |
print("Pipeline finished successfully.")
|
| 196 |
|
| 197 |
-
# Return
|
| 198 |
-
return
|
| 199 |
|
| 200 |
|
| 201 |
# ==============================================================================
|
| 202 |
# IV. Gradio UI Definition
|
| 203 |
# ==============================================================================
|
| 204 |
|
| 205 |
-
title = "
|
| 206 |
description = (
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
-
"3) Gemini summarizes the long text into bullet points (Formal Arabic, preserving English terms)."
|
| 210 |
)
|
| 211 |
|
| 212 |
gr.Interface(
|
|
@@ -216,12 +218,12 @@ gr.Interface(
|
|
| 216 |
label="Upload an audio file (WAV, MP3, etc.) or record directly"
|
| 217 |
),
|
| 218 |
outputs=[
|
| 219 |
-
|
| 220 |
-
gr.Textbox(label="
|
| 221 |
-
|
|
|
|
| 222 |
],
|
| 223 |
title=title,
|
| 224 |
description=description,
|
| 225 |
live=False
|
| 226 |
-
# تم حذف allow_flagging="never"
|
| 227 |
).launch()
|
|
|
|
| 4 |
import os
|
| 5 |
import sys
|
| 6 |
from google import genai
|
| 7 |
+
# Note: PyMuPDF is implicitly required in requirements.txt for handling large text chunks.
|
| 8 |
|
| 9 |
# ==============================================================================
|
| 10 |
# I. ASR Setup (Whisper)
|
|
|
|
| 138 |
|
| 139 |
final_prompt = f"""
|
| 140 |
You are a professional text summarizer. {prompt_type} into clear, comprehensive **Bullet Points**.
|
| 141 |
+
Use **round bullet points (•)** for the list items.
|
| 142 |
You must integrate all key points from all sections.
|
| 143 |
|
| 144 |
Language Instructions:
|
|
|
|
| 164 |
def full_pipeline(audio_path):
|
| 165 |
"""
|
| 166 |
Manages the full pipeline: ASR -> Correction -> Summarization.
|
| 167 |
+
Outputs only the Corrected Text and the Final Summary.
|
| 168 |
"""
|
| 169 |
if asr_pipeline is None:
|
| 170 |
+
return ("Error loading the Transcription model.", "No summary generated.")
|
| 171 |
|
| 172 |
if audio_path is None:
|
| 173 |
+
return ("Please upload an audio file first.", "No summary generated.")
|
| 174 |
|
| 175 |
# 1. ASR Step: Convert audio to raw text
|
| 176 |
print(f"Step 1: Starting ASR for {audio_path}")
|
|
|
|
| 179 |
raw_asr_text = asr_result['text'].strip() if 'text' in asr_result else "Failed to extract text."
|
| 180 |
except Exception as e:
|
| 181 |
error_msg = f"An error occurred during the ASR process (Whisper): {e}"
|
| 182 |
+
return (error_msg, "No summary generated.")
|
| 183 |
|
| 184 |
+
if not raw_asr_text or raw_asr_text == "Failed to extract meaningful text.":
|
| 185 |
+
return ("Transcription failed to extract meaningful text.", "No summary generated.")
|
| 186 |
|
| 187 |
# 2. Correction Step: Correct the raw text
|
| 188 |
print(f"Step 2: Starting text correction for {len(raw_asr_text)} characters.")
|
|
|
|
| 197 |
|
| 198 |
print("Pipeline finished successfully.")
|
| 199 |
|
| 200 |
+
# Return only Corrected Text and Summary (two outputs)
|
| 201 |
+
return corrected_text, clean_summary
|
| 202 |
|
| 203 |
|
| 204 |
# ==============================================================================
|
| 205 |
# IV. Gradio UI Definition
|
| 206 |
# ==============================================================================
|
| 207 |
|
| 208 |
+
title = "🎙️ Audio Transcription, Correction, and Smart Summarization Tool"
|
| 209 |
description = (
|
| 210 |
+
"Upload an audio file to automatically convert speech to text, correct spelling and punctuation errors, "
|
| 211 |
+
"and receive a comprehensive summary in bullet points (Formal Arabic, preserving English terms)."
|
|
|
|
| 212 |
)
|
| 213 |
|
| 214 |
gr.Interface(
|
|
|
|
| 218 |
label="Upload an audio file (WAV, MP3, etc.) or record directly"
|
| 219 |
),
|
| 220 |
outputs=[
|
| 221 |
+
# Output 1: Corrected Text
|
| 222 |
+
gr.Textbox(label="Corrected and Formatted Text", lines=8),
|
| 223 |
+
# Output 2: Final Summary
|
| 224 |
+
gr.Textbox(label="Final Comprehensive Summary", lines=8)
|
| 225 |
],
|
| 226 |
title=title,
|
| 227 |
description=description,
|
| 228 |
live=False
|
|
|
|
| 229 |
).launch()
|