Spaces:
Paused
Paused
| import gradio as gr | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import os | |
| from huggingface_hub import login | |
| # تسجيل الدخول | |
| login(token=os.environ.get('HUGGING_FACE_HUB_TOKEN')) | |
| # تهيئة النموذج الأول (المتخصص) | |
| specialist_model = AutoModelForSeq2SeqLM.from_pretrained("methodya/arabic-summarizer-philosophy") | |
| specialist_tokenizer = AutoTokenizer.from_pretrained("methodya/arabic-summarizer-philosophy") | |
| # تهيئة النموذج الثاني (Gemma) | |
| gemma_model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it") | |
| gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it") | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| specialist_model = specialist_model.to(device) | |
| gemma_model = gemma_model.to(device) | |
| def generate_summary(text, use_pipeline=True, max_length=150, num_beams=7, length_penalty=0.8): | |
| if use_pipeline: | |
| # المرحلة الأولى: التلخيص بالنموذج المتخصص | |
| inputs = specialist_tokenizer(text, return_tensors="pt", max_length=2048, truncation=True).to(device) | |
| specialist_outputs = specialist_model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| num_beams=num_beams, | |
| length_penalty=length_penalty, | |
| early_stopping=True | |
| ) | |
| first_summary = specialist_tokenizer.decode(specialist_outputs[0], skip_special_tokens=True) | |
| # المرحلة الثانية: التحسين باستخدام Gemma | |
| prompt = f"""راجع وحسن هذا الملخص مع الحفاظ على النقاط الرئيسية: | |
| الملخص الأولي: | |
| {first_summary} | |
| قدم التحسين بالشكل التالي: | |
| 1. الفكرة المحورية | |
| 2. النقاط الرئيسية | |
| 3. العلاقات المهمة | |
| """ | |
| inputs = gemma_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device) | |
| final_outputs = gemma_model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=0.3, | |
| do_sample=False | |
| ) | |
| return gemma_tokenizer.decode(final_outputs[0], skip_special_tokens=True) | |
| else: | |
| # استخدام النموذج المتخصص فقط | |
| inputs = specialist_tokenizer(text, return_tensors="pt", max_length=2048, truncation=True).to(device) | |
| outputs = specialist_model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| num_beams=num_beams, | |
| length_penalty=length_penalty, | |
| early_stopping=True | |
| ) | |
| return specialist_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| css = """ | |
| .gradio-container {background: #f9fafb !important} | |
| .rtl-text { direction: rtl; text-align: right; } | |
| """ | |
| interface = gr.Interface( | |
| fn=generate_summary, | |
| inputs=[ | |
| gr.Textbox(lines=8, label="النص", elem_classes="rtl-text"), | |
| gr.Checkbox(label="استخدام المعالجة المزدوجة", value=True), | |
| gr.Slider(50, 250, value=150, label="طول الملخص"), | |
| gr.Slider(1, 10, value=7, step=1, label="دقة التلخيص"), | |
| gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="معامل الطول") | |
| ], | |
| outputs=gr.Textbox(label="الملخص", elem_classes="rtl-text"), | |
| title="ملخص النصوص الفلسفية (نظام مدمج)", | |
| theme=gr.themes.Soft(), | |
| css=css | |
| ) | |
| interface.launch() |