Spaces:
Build error
Build error
| import gradio as gr | |
| import torch | |
| from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer | |
| from moviepy.video.io.VideoFileClip import VideoFileClip | |
| import librosa | |
| import soundfile as sf | |
| import numpy as np | |
| import re | |
| import os | |
| # ====================== | |
| # 1. الإعدادات الأساسية | |
| # ====================== | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # ====================== | |
| # 2. تحميل النماذج | |
| # ====================== | |
| pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=0 if device=="cuda" else -1) | |
| bart_model = AutoModelForSeq2SeqLM.from_pretrained("ahmedabdo/arabic-summarizer-bart") | |
| bart_tokenizer = AutoTokenizer.from_pretrained("ahmedabdo/arabic-summarizer-bart") | |
| # ====================== | |
| # 3. الدوال المساعدة | |
| # ====================== | |
| def clean_text(text): | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def convert_audio_to_text(uploaded_file): | |
| try: | |
| if not uploaded_file: | |
| return "⛔ الرجاء رفع ملف أولاً" | |
| input_path = uploaded_file if isinstance(uploaded_file, str) else uploaded_file.name | |
| output_path = "/tmp/processed.wav" | |
| # معالجة ملفات الفيديو | |
| if input_path.split('.')[-1].lower() in ['mp4', 'avi', 'mov', 'mkv']: | |
| video = VideoFileClip(input_path) | |
| if video.audio: | |
| video.audio.write_audiofile(output_path, codec='pcm_s16le') | |
| else: | |
| return "⛔ لا يوجد صوت في الفيديو!" | |
| else: | |
| output_path = input_path | |
| audio, rate = librosa.load(output_path, sr=16000) | |
| transcripts = [] | |
| # تقسيم الصوت إلى مقاطع للتعامل مع الملفات الكبيرة | |
| for start in np.arange(0, len(audio)/rate, 30): | |
| end = min(start + 30, len(audio)/rate) | |
| segment = audio[int(start*rate):int(end*rate)] | |
| sf.write(f"/tmp/segment_{int(start)}.wav", segment, rate) | |
| transcripts.append(pipe(f"/tmp/segment_{int(start)}.wav")["text"]) | |
| return " ".join(transcripts) | |
| except Exception as e: | |
| return f"⛔ خطأ: {str(e)}" | |
| def summarize_text(text): | |
| cleaned_text = clean_text(text) | |
| inputs = bart_tokenizer(cleaned_text, return_tensors="pt", max_length=1024, truncation=True).to(device) | |
| summary_ids = bart_model.generate( | |
| inputs.input_ids, | |
| max_length=150, | |
| num_beams=4, | |
| early_stopping=True | |
| ) | |
| return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| # ====================== | |
| # 4. المثال التجريبي | |
| # ====================== | |
| EXAMPLE_AUDIO_PATH = "AUDIO-2025-02-24-22-10-37.mp3" | |
| def process_example_audio(): | |
| if not os.path.exists(EXAMPLE_AUDIO_PATH): | |
| return "⛔ الملف التجريبي غير موجود" | |
| return convert_audio_to_text(EXAMPLE_AUDIO_PATH) | |
| # ====================== | |
| # 5. واجهة المستخدم | |
| # ====================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🎤 استخراج النص وتلخيصه") | |
| gr.Markdown("### 🔊 مثال تجريبي") | |
| gr.Audio(EXAMPLE_AUDIO_PATH) | |
| example_btn = gr.Button("تجربة المثال ⚡", elem_classes="custom-button") | |
| file_input = gr.File(file_types=[".wav", ".mp3", ".mp4"]) | |
| extract_btn = gr.Button("استخراج النص", elem_classes="custom-button") | |
| extracted_text = gr.Textbox(label="📝 النص المستخرج", lines=8) | |
| summarize_btn = gr.Button("تلخيص النص", elem_classes="custom-button") | |
| summary_output = gr.Textbox(label="📌 الملخص", lines=6) | |
| extract_btn.click(convert_audio_to_text, inputs=file_input, outputs=extracted_text) | |
| summarize_btn.click(summarize_text, inputs=extracted_text, outputs=summary_output) | |
| example_btn.click(process_example_audio, outputs=extracted_text) | |
| if __name__ == "__main__": | |
| demo.launch() | |