Spaces:

YussefGAFeer
/

vibethinker-api

Sleeping

App Files Files Community

YussefGAFeer commited on 23 days ago

Commit

8c3fd4f

verified ·

1 Parent(s): c049bd7

Update app.py

Browse files

Files changed (1) hide show

app.py +259 -189

app.py CHANGED Viewed

@@ -1,215 +1,285 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
-import torch
-import gc
 # =============================================================================
-# إعداد النموذج (يحمّل مرة واحدة فقط عند بدء الخادم)
 # =============================================================================
-MODEL_ID = "WeiboAI/VibeThinker-1.5B"
-print(f"✅ [إعداد]: جاري تحميل النموذج {MODEL_ID}...")
-print("    - هذا قد يستغرق 1-3 دقائق أول مرة...")
 try:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        trust_remote_code=True
     )
-    print("✅ [جاهز]: تم تحميل النموذج بنجاح!")
 except Exception as e:
-    print(f"❌ [خطأ]: فشل تحميل النموذج: {e}")
-    raise
-# =============================================================================
-# دوال التوليد
-# =============================================================================
-def generate_response(prompt, temperature=0.6, max_tokens=2048):
-    """توليد إجابة من النموذج"""
     try:
-        messages = [{"role": "user", "content": prompt}]
-        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer([text], return_tensors="pt").to(model.device)
-        generation_config = {
-            "max_new_tokens": max_tokens,
-            "do_sample": True,
-            "temperature": temperature,
-            "top_p": 0.95,
-            "top_k": None
-        }
-        with torch.no_grad():
-            outputs = model.generate(**inputs, generation_config=GenerationConfig(**generation_config))
-        response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
-        # استخراج الإجابة فقط (إزالة Prompt)
-        response = response.split("assistant\n")[-1].strip()
-        return response
     except Exception as e:
-        return f"❌ حدث خطأ: {str(e)}"
-def clear_memory():
-    """تنظيف الذاكرة"""
-    gc.collect()
-    torch.cuda.empty_cache()
-    return "✅ تم تنظيف الذاكرة"
-# =============================================================================
-# واجهة Gradio
-# =============================================================================
-css = """
-/* تنسيق مشابه للـ Colab */
-body { background: linear-gradient(135deg, #1a1a2e, #16213e); color: white; }
-.gradio-container { background: rgba(0,0,0,0.3); border-radius: 15px; }
 """
-with gr.Blocks(title="VibeThinker API | HF Space", css=css, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🚀 VibeThinker-1.5B API على Hugging Face Space
-    **نموذج التفكير المنطقي والرياضيات من WeiboAI**
-    ⚡ **النموذج**: `WeiboAI/VibeThinker-1.5B`
-    🔗 **المصدر**: [Hugging Face Hub](https://huggingface.co/WeiboAI/VibeThinker-1.5B)
-    📄 **الترخيص**: MIT (مجاني للاستخدام التجاري)
-    """)
     with gr.Row():
-        with gr.Column(scale=4):
-            chatbot = gr.Chatbot(
-                label="💬 محادثة",
-                height=500,
-                bubble_full_width=False,
-                avatar_images=(None, "https://huggingface.co/WeiboAI/VibeThinker-1.5B/resolve/main/logo.png")
-            )
-            with gr.Row():
-                prompt_input = gr.Textbox(
-                    label="اكتب سؤالك هنا...",
-                    placeholder="أدخل مسألة رياضية أو سؤال منطقي...",
-                    lines=2,
-                    scale=8
-                )
-                submit_btn = gr.Button("🚀 إرسال", scale=1, variant="primary")
-            with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
-                temperature = gr.Slider(0.1, 1.5, value=0.6, label="Temperature", info="درجة الإبداع")
-                max_tokens = gr.Slider(512, 8192, value=2048, label="Max Tokens", info="أقصى طول للإجابة")
-                clear_btn = gr.Button("🗑️ مسح الذاكرة", variant="stop")
-        with gr.Column(scale=1):
-            gr.Markdown("### 📋 معلومات")
-            gr.Markdown("""
-            - **سرعة الاستجابة**: ~2-5 ثانية/سؤال
-            - **دعم اللغة**: الإنجليزية (المهام الرياضية)
-            - **استخدام الذاكرة**: ~4GB VRAM
-            """)
-            status_box = gr.Textbox(
-                label="حالة النظام",
-                value="✅ النموذج جاهز",
-                interactive=False
-            )
-    # =============================================================================
-    # أحداث التحكم
-    # =============================================================================
-    def chat_function(message, history, temp, max_tok):
-        """معالجة المحادثة"""
-        # بناء السياق من المحادثة السابقة
-        full_prompt = ""
-        for human, assistant in history:
-            full_prompt += f"Human: {human}\nAssistant: {assistant}\n"
-        full_prompt += f"Human: {message}\nAssistant: "
-        response = generate_response(full_prompt, temp, max_tok)
-        return response
-    def handle_submit(prompt, history, temp, max_tok):
-        """معالجة إرسال السؤال"""
-        if not prompt.strip():
-            return "", history
-        # إضافة السؤال للمحادثة
-        history.append([prompt, None])
-        # توليد الإجابة
-        response = chat_function(prompt, history[:-1], temp, max_tok)
-        # تحديث المحادثة بالإجابة
-        history[-1][1] = response
-        # تنظيف الذاكرة بعد كل استجابة
-        clear_memory()
-        return "", history
-    # ربط الأحداث
-    submit_btn.click(
-        handle_submit,
-        inputs=[prompt_input, chatbot, temperature, max_tokens],
-        outputs=[prompt_input, chatbot]
-    )
-    prompt_input.submit(
-        handle_submit,
-        inputs=[prompt_input, chatbot, temperature, max_tokens],
-        outputs=[prompt_input, chatbot]
-    )
-    clear_btn.click(
-        clear_memory,
-        outputs=[status_box]
-    )
-    # =============================================================================
-    # معلومات مخصصة لـ API
-    # =============================================================================
     gr.Markdown("""
     ---
-    ### 🔌 **استخدام كـ API**
-    يمكنك استخدام هذا Space كـ API خارجي:
-    **Endpoint**: `https://YOUR-SPACE-NAME.hf.space/v1/chat/completions`
-    **Headers**:
-    ```json
-    {
-      "Content-Type": "application/json"
-    }
-    ```
-    **Body**:
-    ```json
-    {
-      "model": "VibeThinker-1.5B",
-      "messages": [{"role": "user", "content": "مسألتك"}],
-      "temperature": 0.6
-    }
-    ```
     """)
-# =============================================================================
-# تشغيل التطبيق
-# =============================================================================
-demo.queue(max_size=20).launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    share=False,  # لا تحتاج لـ share لأن Space عام
-    show_api=True,  # إظهار وثائق API تلقائياً
-    show_error=True
-)

 # =============================================================================
+# كود Ollama لـ Hugging Face Space مع Ngrok
+# نسخة محسّنة ومستقرة
 # =============================================================================
+import sys
+import subprocess
+import time
+import os
+import signal
+import threading
+import gradio as gr
+# -----------------------------------------------------------------------------
+# الجزء الأول: تثبيت المكتبات الضرورية و Ollama
+# -----------------------------------------------------------------------------
+print("✅ [الخطوة 1/5]: تثبيت المكتبات الضرورية و Ollama...")
+sys.stdout.flush()
+# تثبيت pyngrok
+print("    - تثبيت pyngrok...", end="")
+sys.stdout.flush()
 try:
+    subprocess.run([sys.executable, '-m', 'pip', 'install', 'pyngrok', '-q'], check=True)
+    print(" تم.")
+    sys.stdout.flush()
+except subprocess.CalledProcessError as e:
+    print(f"\n[❌ خطأ فادح]: فشل تثبيت pyngrok. رمز الخروج: {e.returncode}")
+    sys.exit(1)
+from pyngrok import ngrok
+# تثبيت Ollama
+print("    - تثبيت Ollama بالطريقة الرسمية...")
+sys.stdout.flush()
+try:
+    install_command = "curl -fsSL https://ollama.com/install.sh | sh"
+    subprocess.run(install_command, shell=True, check=True, capture_output=True, text=True)
+    print("    - تم تثبيت Ollama بنجاح.")
+    sys.stdout.flush()
+except subprocess.CalledProcessError as e:
+    print(f"\n[❌ خطأ فادح]: فشل تثبيت Ollama. رمز الخروج: {e.returncode}")
+    print(e.stderr)
+    sys.exit(1)
+except Exception as e:
+    print(f"\n[❌ خطأ فادح]: حدث خطأ غير متوقع أثناء تثبيت Ollama. الخطأ: {e}")
+    sys.exit(1)
+print("✅ [الخطوة 1/5]: تم تثبيت المكتبات و Ollama بنجاح!\n")
+sys.stdout.flush()
+# -----------------------------------------------------------------------------
+# الجزء الثاني: إعداد Ngrok وتشغيل خادم Ollama
+# -----------------------------------------------------------------------------
+print("✅ [الخطوة 2/5]: إعداد Ngrok وتشغيل خادم Ollama...")
+sys.stdout.flush()
+# استخدم Hugging Face Secrets لتخزين التوكن بشكل آمن
+NGROK_AUTH_TOKEN = os.getenv("NGROK_AUTH_TOKEN", "")
+try:
+    if not NGROK_AUTH_TOKEN:
+        raise ValueError("⚠️ يرجى إضافة NGROK_AUTH_TOKEN في إعدادات Secrets للـ Space")
+    print("    - إعداد Ngrok Auth Token...", end="")
+    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
+    print(" تم.")
+    sys.stdout.flush()
+except Exception as e:
+    print(f"\n[❌ خطأ فادح]: فشل إعداد Ngrok Auth Token. الخطأ: {e}")
+    sys.exit(1)
+# تشغيل خادم Ollama في الخلفية
+print("    - تشغيل خادم Ollama في الخلفية...", end="")
+sys.stdout.flush()
+os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
+ollama_serve_process = None
+try:
+    ollama_serve_process = subprocess.Popen(
+        ['ollama', 'serve'],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        preexec_fn=os.setsid
     )
+    print(" جاري البدء...", end="")
+    time.sleep(10)
+    check_process = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=20)
+    if check_process.returncode == 0:
+        print(" يعمل بنجاح.")
+    else:
+        print(f"\n    - [❌] فشل تشغيل خادم Ollama بشكل صحيح.")
+        stderr_output = ollama_serve_process.stderr.read().decode('utf-8')
+        print(f"      الخطأ من الخادم: {stderr_output.strip()}")
+        raise Exception("الخادم لم يبدأ بشكل صحيح.")
 except Exception as e:
+    print(f"\n[❌ خطأ فادح]: فشل تشغيل خادم Ollama. الخطأ: {e}")
+    sys.exit(1)
+print("✅ [الخطوة 2/5]: تم تشغيل خادم Ollama بنجاح.\n")
+sys.stdout.flush()
+# -----------------------------------------------------------------------------
+# الجزء الثالث: تشغيل ngrok وإنشاء الرابط العام
+# -----------------------------------------------------------------------------
+print("✅ [الخطوة 3/5]: جاري تشغيل ngrok لتعريض Ollama API...")
+sys.stdout.flush()
+ngrok_tunnel = None
+public_url_str = None
+try:
+    print("    - إنشاء نفق ngrok...", end="")
+    ngrok_tunnel = ngrok.connect(11434, "http")
+    public_url_str = ngrok_tunnel.public_url
+    print(" تم.")
+    sys.stdout.flush()
+    print("\n🔗 الـ API متاح الآن على الرابط العام التالي:")
+    print(f"   {public_url_str}")
+    print("\n")
+    sys.stdout.flush()
+except Exception as e:
+    print(f"\n[❌ خطأ فادح]: حدث خطأ أثناء تشغيل ngrok. الخطأ: {e}")
+    sys.exit(1)
+print("✅ [الخطوة 3/5]: تم تشغيل النفق بنجاح وتم عرض الرابط.\n")
+sys.stdout.flush()
+# -----------------------------------------------------------------------------
+# الجزء الرابع: فحص مساحة التخزين وسحب النماذج
+# -----------------------------------------------------------------------------
+print("✅ [الخطوة 4/5]: فحص مساحة التخزين وسحب النماذج المطلوبة...")
+sys.stdout.flush()
+print("    - فحص مساحة التخزين المتاحة...")
+subprocess.run(['df', '-h', '/'])
+print("")
+sys.stdout.flush()
+models_to_pull = [
+    "hf.co/Mungert/VibeThinker-1.5B-GGUF:BF16",
+    "hf.co/Mungert/Qwen3-30B-A1.5B-High-Speed-GGUF:IQ3_M",
+]
+successfully_pulled = []
+failed_to_pull = []
+print("    - ستبدأ عملية سحب النماذج بشكل تسلسلي (واحد تلو الآخر) لضمان الاستقرار.")
+print("    - ستظهر لك نسبة التقدم لكل نموذج بشكل مباشر. قد يستغرق هذا وقتًا طويلاً.\n")
+sys.stdout.flush()
+for model_name in models_to_pull:
+    print(f"--- [⏳] جاري سحب النموذج: {model_name} ---")
+    sys.stdout.flush()
     try:
+        subprocess.run(
+            ['ollama', 'pull', model_name],
+            check=True
+        )
+        print(f"--- [✔️] تم سحب النموذج {model_name} بنجاح ---\n")
+        successfully_pulled.append(model_name)
+    except subprocess.CalledProcessError as e:
+        print(f"--- [❌] فشل سحب النموذج {model_name}. رمز الخروج: {e.returncode} ---\n")
+        failed_to_pull.append(model_name)
     except Exception as e:
+        print(f"--- [❌] حدث خطأ استثنائي عند سحب النموذج {model_name}. الخطأ: {e} ---\n")
+        failed_to_pull.append(model_name)
+    sys.stdout.flush()
+print("✅ [الخطوة 4/5]: انتهت عملية سحب النماذج!\n")
+print("--- ملخص عملية السحب ---")
+if successfully_pulled:
+    print(f"✔️ نماذج تم سحبها بنجاح ({len(successfully_pulled)}): {', '.join(successfully_pulled)}")
+if failed_to_pull:
+    print(f"❌ نماذج فشل سحبها ({len(failed_to_pull)}): {', '.join(failed_to_pull)}")
+print("-------------------------\n")
+sys.stdout.flush()
+# -----------------------------------------------------------------------------
+# الجزء الخامس: إنشاء واجهة Gradio
+# -----------------------------------------------------------------------------
+print("✅ [الخطوة 5/5]: إنشاء واجهة Gradio...")
+def get_models_list():
+    """الحصول على قائمة النماذج المثبتة"""
+    try:
+        result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=10)
+        return result.stdout
+    except Exception as e:
+        return f"❌ خطأ في الحصول على قائمة النماذج: {e}"
+# إنشاء محتوى تعليمات الاستخدام
+instructions = f"""
+# ✨ خادم Ollama جاهز للاستخدام ✨
+## 🔗 رابط API العام:
+```
+{public_url_str}
+```
+## 📋 تعليمات الاستخدام في RikkaHub:
+1. انسخ الرابط أعلاه
+2. في RikkaHub، اذهب إلى إعدادات المزود (Provider Settings)
+3. اختر نوع المزود 'OpenAI-Compatible'
+4. في خانة 'Base URL'، الصق الرابط وأضف له /v1:
+   ```
+   {public_url_str}/v1
+   ```
+5. في خانة 'Model'، اكتب اسم النموذج الذي تريد استخدامه
+## 📊 قائمة النماذج المثبتة:
+```
+{get_models_list()}
+```
+## ⚠️ ملاحظات مهمة:
+- يجب إبقاء هذا Space قيد التشغيل للحفاظ على الاتصال
+- الرابط العام سيتغير إذا أعيد تشغيل Space
+- استخدم النماذج الخفيفة للحصول على أداء أفضل
 """
+# إنشاء واجهة Gradio
+with gr.Blocks(title="Ollama Server on Hugging Face", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(instructions)
     with gr.Row():
+        refresh_btn = gr.Button("🔄 تحديث قائمة النماذج", size="sm")
+        models_output = gr.Textbox(
+            label="النماذج المثبتة حاليًا",
+            value=get_models_list(),
+            lines=10,
+            interactive=False
+        )
+    refresh_btn.click(fn=get_models_list, outputs=models_output)
     gr.Markdown("""
     ---
+    ### 💡 نصائح للاستخدام الأمثل:
+    - استخدم النماذج الصغيرة (1B-3B) للحصول على استجابة سريعة
+    - تأكد من استقرار الاتصال بالإنترنت
+    - راقب استخدام الذاكرة في Logs
     """)
+print("✅ تم إنشاء واجهة Gradio بنجاح!")
+print("⏳ الخادم يعمل الآن ومتاح للاستخدام...")
+sys.stdout.flush()
+# إطلاق Gradio في thread منفصل للسماح بالتشغيل المستمر
+if __name__ == "__main__":
+    try:
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            show_error=True
+        )
+    except KeyboardInterrupt:
+        print('\n\n⏳ تم طلب الإيقاف. جاري إغلاق العمليات...')
+    finally:
+        if ollama_serve_process:
+            try:
+                pgid = os.getpgid(ollama_serve_process.pid)
+                os.killpg(pgid, signal.SIGTERM)
+                print("    - تم إيقاف خادم Ollama.")
+            except OSError:
+                print("    - ⚠️ لم يتم العثور على عملية خادم Ollama.")
+        if ngrok_tunnel:
+            try:
+                ngrok.disconnect(public_url_str)
+                print("    - تم إيقاف نفق ngrok.")
+            except Exception as e:
+                print(f"    - ⚠️ فشل إيقاف ngrok: {e}")
+        try:
+            ngrok.kill()
+            print("    - تم إيقاف ngrok daemon.")
+        except Exception as e:
+            print(f"    - ⚠️ فشل إيقاف ngrok daemon: {e}")
+        print('✅ تم إيقاف جميع العمليات بنجاح.')