Spaces:

YussefGAFeer
/

vibethinker-api

Sleeping

App Files Files Community

vibethinker-api / app.py

YussefGAFeer

Update app.py

c2c7342 verified 9 days ago

raw

history blame contribute delete

16 kB

	# =============================================================================
	# كود Ollama لـ Hugging Face Space مع Ngrok (نسخة محسّنة ضد Timeout)
	# =============================================================================

	import sys
	import subprocess
	import time
	import os
	import signal
	import gradio as gr
	import socket

	# -----------------------------------------------------------------------------
	# الجزء الأول: تثبيت المكتبات الضرورية و Ollama
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 1/6]: تثبيت المكتبات الضرورية و Ollama...")
	sys.stdout.flush()

	# تثبيت pyngrok بمحاولات متعددة
	print(" - تثبيت pyngrok...", end="")
	sys.stdout.flush()
	max_retries = 3
	for attempt in range(max_retries):
	try:
	subprocess.run(
	[sys.executable, '-m', 'pip', 'install', 'pyngrok', '-q', '--no-cache-dir'],
	check=True,
	timeout=120
	)
	print(" تم.")
	sys.stdout.flush()
	break
	except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
	if attempt < max_retries - 1:
	print(f" فشل (محاولة {attempt + 1}/{max_retries})، إعادة المحاولة...")
	time.sleep(5)
	else:
	print(f"\n[❌ خطأ فادح]: فشل تثبيت pyngrok بعد {max_retries} محاولات.")
	sys.exit(1)

	# تثبيت Ollama
	print(" - تثبيت Ollama بالطريقة الرسمية...")
	sys.stdout.flush()
	try:
	install_command = "curl -fsSL https://ollama.com/install.sh \| sh"
	result = subprocess.run(
	install_command,
	shell=True,
	check=True,
	capture_output=True,
	text=True,
	timeout=300
	)
	print(" - تم تثبيت Ollama بنجاح.")
	sys.stdout.flush()
	except subprocess.TimeoutExpired:
	print("\n[❌ خطأ فادح]: انتهت مهلة تثبيت Ollama (300 ثانية).")
	sys.exit(1)
	except subprocess.CalledProcessError as e:
	print(f"\n[❌ خطأ فادح]: فشل تثبيت Ollama. رمز الخروج: {e.returncode}")
	print(e.stderr)
	sys.exit(1)

	print("✅ [الخطوة 1/6]: تم تثبيت المكتبات و Ollama بنجاح!\n")
	sys.stdout.flush()

	# -----------------------------------------------------------------------------
	# الجزء الثاني: تشغيل خادم Ollama أولاً
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 2/6]: تشغيل خادم Ollama...")
	sys.stdout.flush()

	os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'

	ollama_serve_process = None
	try:
	print(" - تشغيل خادم Ollama في الخلفية...", end="")
	ollama_serve_process = subprocess.Popen(
	['ollama', 'serve'],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	preexec_fn=os.setsid
	)
	print(" جاري البدء...", end="")
	time.sleep(15)

	check_process = subprocess.run(
	['ollama', 'list'],
	capture_output=True,
	text=True,
	timeout=30
	)
	if check_process.returncode == 0:
	print(" يعمل بنجاح.")
	else:
	print(f"\n - [❌] فشل تشغيل خادم Ollama بشكل صحيح.")
	if ollama_serve_process.stderr:
	stderr_output = ollama_serve_process.stderr.read().decode('utf-8')
	print(f" الخطأ من الخادم: {stderr_output.strip()}")
	raise Exception("الخادم لم يبدأ بشكل صحيح.")
	except Exception as e:
	print(f"\n[❌ خطأ فادح]: فشل تشغيل خادم Ollama. الخطأ: {e}")
	sys.exit(1)

	print("✅ [الخطوة 2/6]: تم تشغيل خادم Ollama بنجاح.\n")
	sys.stdout.flush()

	# -----------------------------------------------------------------------------
	# الجزء الثالث: إعداد Ngrok مع معالجة محسّنة للـ Timeout
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 3/6]: إعداد Ngrok مع حماية من Timeout...")
	sys.stdout.flush()

	NGROK_AUTH_TOKEN = os.getenv("NGROK_AUTH_TOKEN", "")

	ngrok_tunnel = None
	public_url_str = None
	ngrok_setup_success = False

	if not NGROK_AUTH_TOKEN:
	print("⚠️ [تحذير]: لم يتم العثور على NGROK_AUTH_TOKEN في Secrets")
	print(" سيتم تخطي إعداد Ngrok. يمكنك استخدام الخادم محلياً فقط.")
	print(" لإضافة التوكن: Settings → Repository secrets → NGROK_AUTH_TOKEN\n")
	else:
	try:
	print(" - استيراد pyngrok...", end="")
	import pyngrok
	from pyngrok import ngrok, conf
	print(" تم.")

	# زيادة timeout لـ ngrok
	print(" - تكوين إعدادات ngrok...", end="")
	pyngrok_config = conf.get_default()
	pyngrok_config.start_timeout = 180 # 3 دقائق
	pyngrok_config.request_timeout = 30
	print(" تم.")

	print(" - تعيين Auth Token...", end="")
	ngrok.set_auth_token(NGROK_AUTH_TOKEN)
	print(" تم.")

	print(" - إنشاء نفق ngrok (قد يستغرق دقائق)...", end="")
	sys.stdout.flush()

	# محاولة الاتصال مع retry
	for attempt in range(3):
	try:
	ngrok_tunnel = ngrok.connect(
	11434,
	"http",
	bind_tls=True
	)
	public_url_str = ngrok_tunnel.public_url
	ngrok_setup_success = True
	print(" تم بنجاح!")
	break
	except Exception as conn_error:
	if attempt < 2:
	print(f"\n محاولة {attempt + 1} فشلت، إعادة المحاولة بعد 10 ثواني...")
	time.sleep(10)
	else:
	raise conn_error

	if ngrok_setup_success:
	print(f"\n🔗 الـ API متاح الآن على الرابط العام التالي:")
	print(f" {public_url_str}\n")
	sys.stdout.flush()

	except ImportError as e:
	print(f"\n⚠️ [تحذير]: فشل استيراد pyngrok: {e}")
	print(" سيتم تشغيل الخادم محلياً فقط.\n")
	except Exception as e:
	error_msg = str(e)
	if "timeout" in error_msg.lower() or "timed out" in error_msg.lower():
	print(f"\n⚠️ [تحذير]: انتهت مهلة الاتصال بـ ngrok: {error_msg}")
	print(" الأسباب المحتملة:")
	print(" 1. اتصال الإنترنت بطيء في Hugging Face Space")
	print(" 2. خوادم ngrok مشغولة")
	print(" 3. جدار حماية يمنع الاتصال")
	print("\n الحلول:")
	print(" ✓ أعد تشغيل Space")
	print(" ✓ استخدم Cloudflare Tunnel بدلاً من ngrok")
	print(" ✓ استخدم الخادم محلياً داخل Hugging Face\n")
	else:
	print(f"\n⚠️ [تحذير]: فشل إعداد Ngrok: {e}")
	print(" سيتم تشغيل الخادم محلياً فقط.\n")

	if ngrok_setup_success:
	print("✅ [الخطوة 3/6]: تم تشغيل النفق بنجاح!\n")
	else:
	print("⚠️ [الخطوة 3/6]: تم تخطي Ngrok، الخادم يعمل محلياً فقط.\n")
	sys.stdout.flush()

	# -----------------------------------------------------------------------------
	# الجزء الرابع: فحص مساحة التخزين
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 4/6]: فحص مساحة التخزين...")
	sys.stdout.flush()
	subprocess.run(['df', '-h', '/'])
	print("")
	sys.stdout.flush()

	# -----------------------------------------------------------------------------
	# الجزء الخامس: سحب النماذج
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 5/6]: سحب النماذج المطلوبة...")
	sys.stdout.flush()

	models_to_pull = [
	# "hf.co/unsloth/Olmo-3-32B-Think-GGUF:Q4_0",
	# "hf.co/unsloth/Qwen3-VL-30B-A3B-Thinking-1M-GGUF:Q4_0",
	"seamon67/Ministral-3-Reasoning:14b"
	]

	successfully_pulled = []
	failed_to_pull = []

	print(" - بدء سحب النماذج بشكل تسلسلي...\n")
	sys.stdout.flush()

	for model_name in models_to_pull:
	print(f"--- [⏳] جاري سحب النموذج: {model_name} ---")
	sys.stdout.flush()
	try:
	subprocess.run(
	['ollama', 'pull', model_name],
	check=True,
	timeout=1800 # 30 دقيقة لكل نموذج
	)
	print(f"--- [✔️] تم سحب النموذج {model_name} بنجاح ---\n")
	successfully_pulled.append(model_name)
	except subprocess.TimeoutExpired:
	print(f"--- [❌] انتهت مهلة سحب النموذج {model_name} ---\n")
	failed_to_pull.append(model_name)
	except subprocess.CalledProcessError as e:
	print(f"--- [❌] فشل سحب النموذج {model_name}. رمز الخروج: {e.returncode} ---\n")
	failed_to_pull.append(model_name)
	except Exception as e:
	print(f"--- [❌] خطأ استثنائي: {e} ---\n")
	failed_to_pull.append(model_name)
	sys.stdout.flush()

	print("✅ [الخطوة 5/6]: انتهت عملية سحب النماذج!\n")

	print("--- ملخص عملية السحب ---")
	if successfully_pulled:
	print(f"✔️ نماذج تم سحبها بنجاح ({len(successfully_pulled)}): {', '.join(successfully_pulled)}")
	if failed_to_pull:
	print(f"❌ نماذج فشل سحبها ({len(failed_to_pull)}): {', '.join(failed_to_pull)}")
	print("-------------------------\n")
	sys.stdout.flush()

	# -----------------------------------------------------------------------------
	# الجزء السادس: إنشاء واجهة Gradio
	# -----------------------------------------------------------------------------
	print("✅ [الخطوة 6/6]: إنشاء واجهة Gradio...")

	def get_models_list():
	"""الحصول على قائمة النماذج المثبتة"""
	try:
	result = subprocess.run(
	['ollama', 'list'],
	capture_output=True,
	text=True,
	timeout=10
	)
	return result.stdout if result.returncode == 0 else "❌ خطأ في الحصول على القائمة"
	except Exception as e:
	return f"❌ خطأ: {e}"

	def get_server_status():
	"""الحصول على حالة الخادم"""
	status = "🟢 الخادم يعمل\n\n"

	# حالة Ollama
	try:
	result = subprocess.run(['ollama', 'list'], capture_output=True, timeout=5)
	if result.returncode == 0:
	status += "✅ Ollama: يعمل\n"
	else:
	status += "❌ Ollama: لا يعمل\n"
	except:
	status += "❌ Ollama: خطأ في الفحص\n"

	# حالة Ngrok
	if ngrok_setup_success and public_url_str:
	status += f"✅ Ngrok: متصل\n"
	status += f"🔗 الرابط العام: {public_url_str}\n"
	else:
	status += "⚠️ Ngrok: غير متصل (محلي فقط)\n"

	return status

	# إنشاء محتوى الواجهة
	if ngrok_setup_success and public_url_str:
	api_url = public_url_str
	api_url_v1 = f"{public_url_str}/v1"
	connection_status = "🟢 متصل بالإنترنت عبر Ngrok"
	else:
	api_url = "http://localhost:11434"
	api_url_v1 = "http://localhost:11434/v1"
	connection_status = "🟡 يعمل محلياً فقط (داخل Hugging Face Space)"

	instructions = f"""
	# ✨ خادم Ollama على Hugging Face Space ✨

	## 📊 حالة الاتصال
	{connection_status}

	## 🔗 روابط API:

	### الرابط الأساسي:
	```
	{api_url}
	```

	### رابط OpenAI Compatible:
	```
	{api_url_v1}
	```

	## 📋 تعليمات الاستخدام:

	### في RikkaHub (إذا كان Ngrok يعمل):
	1. انسخ الرابط: `{api_url_v1}`
	2. اذهب إلى Provider Settings
	3. اختر: OpenAI-Compatible
	4. Base URL: الصق الرابط
	5. Model: اسم النموذج (مثال: `hf.co/Mungert/VibeThinker-1.5B-GGUF:BF16`)

	### اختبار باستخدام curl:
	```bash
	curl {api_url}/api/tags
	```

	## ⚠️ ملاحظات:
	- إذا فشل Ngrok، يمكنك استخدام الخادم محلياً داخل HF فقط
	- أعد تشغيل Space إذا واجهت مشاكل
	- استخدم نماذج صغيرة للأداء الأفضل
	"""

	# إنشاء واجهة Gradio
	with gr.Blocks(title="Ollama Server", theme=gr.themes.Soft(), css="""
	.status-box { padding: 15px; border-radius: 8px; background: #f0f0f0; }
	.warning-box { padding: 15px; border-radius: 8px; background: #fff3cd; border-left: 4px solid #ffc107; }
	.success-box { padding: 15px; border-radius: 8px; background: #d4edda; border-left: 4px solid #28a745; }
	""") as demo:

	gr.Markdown(instructions)

	with gr.Row():
	with gr.Column(scale=1):
	status_btn = gr.Button("🔄 تحديث حالة الخادم", size="sm")
	status_output = gr.Textbox(
	label="حالة الخادم",
	value=get_server_status(),
	lines=6,
	interactive=False
	)

	with gr.Column(scale=1):
	models_btn = gr.Button("🔄 تحديث قائمة النماذج", size="sm")
	models_output = gr.Textbox(
	label="النماذج المثبتة",
	value=get_models_list(),
	lines=6,
	interactive=False
	)

	status_btn.click(fn=get_server_status, outputs=status_output)
	models_btn.click(fn=get_models_list, outputs=models_output)

	if not ngrok_setup_success:
	gr.Markdown("""
	<div class="warning-box">
	<h3>⚠️ تنبيه: Ngrok غير متصل</h3>
	<p>فشل الاتصال بـ Ngrok بسبب timeout. الخادم يعمل محلياً فقط.</p>
	<p><strong>الحلول الممكنة:</strong></p>
	<ul>
	<li>أعد تشغيل Space (قد ينجح في المحاولة الثانية)</li>
	<li>تحقق من إضافة NGROK_AUTH_TOKEN في Secrets بشكل صحيح</li>
	<li>استخدم بديل مثل Cloudflare Tunnel</li>
	</ul>
	</div>
	""")

	gr.Markdown("""
	---
	### 💡 نصائح:
	- النماذج الصغيرة (1B-3B) أسرع على CPU
	- راقب استخدام الذاكرة في Logs
	- أعد تشغيل Space إذا حدثت مشاكل
	""")

	print("✅ تم إنشاء واجهة Gradio بنجاح!")
	print("⏳ الخادم جاهز للاستخدام...\n")
	sys.stdout.flush()

	# إطلاق Gradio
	if __name__ == "__main__":
	try:
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)
	except KeyboardInterrupt:
	print('\n⏳ إيقاف...')
	finally:
	# تنظيف العمليات
	if ollama_serve_process:
	try:
	pgid = os.getpgid(ollama_serve_process.pid)
	os.killpg(pgid, signal.SIGTERM)
	print("✓ تم إيقاف Ollama")
	except:
	pass

	if ngrok_tunnel:
	try:
	ngrok.disconnect(public_url_str)
	ngrok.kill()
	print("✓ تم إيقاف Ngrok")
	except:
	pass

	print('✅ تم الإيقاف بنجاح')