import gradio as gr import torch from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer from moviepy.video.io.VideoFileClip import VideoFileClip import librosa import soundfile as sf import numpy as np import re import os # ====================== # 1. الإعدادات الأساسية # ====================== device = "cuda" if torch.cuda.is_available() else "cpu" # ====================== # 2. تحميل النماذج # ====================== pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=0 if device=="cuda" else -1) bart_model = AutoModelForSeq2SeqLM.from_pretrained("ahmedabdo/arabic-summarizer-bart") bart_tokenizer = AutoTokenizer.from_pretrained("ahmedabdo/arabic-summarizer-bart") # ====================== # 3. الدوال المساعدة # ====================== def clean_text(text): return re.sub(r'\s+', ' ', text).strip() def convert_audio_to_text(uploaded_file): try: if not uploaded_file: return "⛔ الرجاء رفع ملف أولاً" input_path = uploaded_file if isinstance(uploaded_file, str) else uploaded_file.name output_path = "/tmp/processed.wav" # معالجة ملفات الفيديو if input_path.split('.')[-1].lower() in ['mp4', 'avi', 'mov', 'mkv']: video = VideoFileClip(input_path) if video.audio: video.audio.write_audiofile(output_path, codec='pcm_s16le') else: return "⛔ لا يوجد صوت في الفيديو!" else: output_path = input_path audio, rate = librosa.load(output_path, sr=16000) transcripts = [] # تقسيم الصوت إلى مقاطع للتعامل مع الملفات الكبيرة for start in np.arange(0, len(audio)/rate, 30): end = min(start + 30, len(audio)/rate) segment = audio[int(start*rate):int(end*rate)] sf.write(f"/tmp/segment_{int(start)}.wav", segment, rate) transcripts.append(pipe(f"/tmp/segment_{int(start)}.wav")["text"]) return " ".join(transcripts) except Exception as e: return f"⛔ خطأ: {str(e)}" def summarize_text(text): cleaned_text = clean_text(text) inputs = bart_tokenizer(cleaned_text, return_tensors="pt", max_length=1024, truncation=True).to(device) summary_ids = bart_model.generate( inputs.input_ids, max_length=150, num_beams=4, early_stopping=True ) return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True) # ====================== # 4. المثال التجريبي # ====================== EXAMPLE_AUDIO_PATH = "AUDIO-2025-02-24-22-10-37.mp3" def process_example_audio(): if not os.path.exists(EXAMPLE_AUDIO_PATH): return "⛔ الملف التجريبي غير موجود" return convert_audio_to_text(EXAMPLE_AUDIO_PATH) # ====================== # 5. واجهة المستخدم # ====================== with gr.Blocks() as demo: gr.Markdown("## 🎤 استخراج النص وتلخيصه") gr.Markdown("### 🔊 مثال تجريبي") gr.Audio(EXAMPLE_AUDIO_PATH) example_btn = gr.Button("تجربة المثال ⚡", elem_classes="custom-button") file_input = gr.File(file_types=[".wav", ".mp3", ".mp4"]) extract_btn = gr.Button("استخراج النص", elem_classes="custom-button") extracted_text = gr.Textbox(label="📝 النص المستخرج", lines=8) summarize_btn = gr.Button("تلخيص النص", elem_classes="custom-button") summary_output = gr.Textbox(label="📌 الملخص", lines=6) extract_btn.click(convert_audio_to_text, inputs=file_input, outputs=extracted_text) summarize_btn.click(summarize_text, inputs=extracted_text, outputs=summary_output) example_btn.click(process_example_audio, outputs=extracted_text) if __name__ == "__main__": demo.launch()