Shatha2030's picture
Update app.py
f24bd97 verified
import gradio as gr
import torch
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from moviepy.video.io.VideoFileClip import VideoFileClip
import librosa
import soundfile as sf
import numpy as np
import re
import os
# ======================
# 1. الإعدادات الأساسية
# ======================
device = "cuda" if torch.cuda.is_available() else "cpu"
# ======================
# 2. تحميل النماذج
# ======================
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=0 if device=="cuda" else -1)
bart_model = AutoModelForSeq2SeqLM.from_pretrained("ahmedabdo/arabic-summarizer-bart")
bart_tokenizer = AutoTokenizer.from_pretrained("ahmedabdo/arabic-summarizer-bart")
# ======================
# 3. الدوال المساعدة
# ======================
def clean_text(text):
return re.sub(r'\s+', ' ', text).strip()
def convert_audio_to_text(uploaded_file):
try:
if not uploaded_file:
return "⛔ الرجاء رفع ملف أولاً"
input_path = uploaded_file if isinstance(uploaded_file, str) else uploaded_file.name
output_path = "/tmp/processed.wav"
# معالجة ملفات الفيديو
if input_path.split('.')[-1].lower() in ['mp4', 'avi', 'mov', 'mkv']:
video = VideoFileClip(input_path)
if video.audio:
video.audio.write_audiofile(output_path, codec='pcm_s16le')
else:
return "⛔ لا يوجد صوت في الفيديو!"
else:
output_path = input_path
audio, rate = librosa.load(output_path, sr=16000)
transcripts = []
# تقسيم الصوت إلى مقاطع للتعامل مع الملفات الكبيرة
for start in np.arange(0, len(audio)/rate, 30):
end = min(start + 30, len(audio)/rate)
segment = audio[int(start*rate):int(end*rate)]
sf.write(f"/tmp/segment_{int(start)}.wav", segment, rate)
transcripts.append(pipe(f"/tmp/segment_{int(start)}.wav")["text"])
return " ".join(transcripts)
except Exception as e:
return f"⛔ خطأ: {str(e)}"
def summarize_text(text):
cleaned_text = clean_text(text)
inputs = bart_tokenizer(cleaned_text, return_tensors="pt", max_length=1024, truncation=True).to(device)
summary_ids = bart_model.generate(
inputs.input_ids,
max_length=150,
num_beams=4,
early_stopping=True
)
return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# ======================
# 4. المثال التجريبي
# ======================
EXAMPLE_AUDIO_PATH = "AUDIO-2025-02-24-22-10-37.mp3"
def process_example_audio():
if not os.path.exists(EXAMPLE_AUDIO_PATH):
return "⛔ الملف التجريبي غير موجود"
return convert_audio_to_text(EXAMPLE_AUDIO_PATH)
# ======================
# 5. واجهة المستخدم
# ======================
with gr.Blocks() as demo:
gr.Markdown("## 🎤 استخراج النص وتلخيصه")
gr.Markdown("### 🔊 مثال تجريبي")
gr.Audio(EXAMPLE_AUDIO_PATH)
example_btn = gr.Button("تجربة المثال ⚡", elem_classes="custom-button")
file_input = gr.File(file_types=[".wav", ".mp3", ".mp4"])
extract_btn = gr.Button("استخراج النص", elem_classes="custom-button")
extracted_text = gr.Textbox(label="📝 النص المستخرج", lines=8)
summarize_btn = gr.Button("تلخيص النص", elem_classes="custom-button")
summary_output = gr.Textbox(label="📌 الملخص", lines=6)
extract_btn.click(convert_audio_to_text, inputs=file_input, outputs=extracted_text)
summarize_btn.click(summarize_text, inputs=extracted_text, outputs=summary_output)
example_btn.click(process_example_audio, outputs=extracted_text)
if __name__ == "__main__":
demo.launch()