Spaces:
Sleeping
Sleeping
| import os | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import gradio as gr | |
| # 1. سنستخدم نموذج Qwen 2.5 المستقر (من المصدر العام مؤقتاً لتشغيل المشروع) | |
| # هذا لا يمنع أن البوت أصبح "خاصاً بك" ويعمل على مساحتك | |
| REPO_ID = "bartowski/Qwen2.5-1.5B-Instruct-GGUF" | |
| FILENAME = "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" | |
| print(f"🚀 Starting download for: {FILENAME} from {REPO_ID}") | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME | |
| ) | |
| print(f"✅ Model downloaded to: {model_path}") | |
| except Exception as e: | |
| print(f"❌ Error downloading: {e}") | |
| raise e | |
| print("⚙️ Loading LLM...") | |
| # إعدادات مناسبة جداً لسرعة الرد | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2, | |
| verbose=False | |
| ) | |
| print("✅ LLM Loaded!") | |
| def generate_response(message, history): | |
| # تنسيق Qwen 2.5 للحصول على أفضل دقة | |
| prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["<|im_end|>"], | |
| echo=False | |
| ) | |
| return output['choices'][0]['text'] | |
| # واجهة بسيطة للتجربة | |
| demo = gr.ChatInterface( | |
| fn=generate_response, | |
| title="My Private AI 🧠", | |
| description="Qwen 2.5 - Running Securely on Docker" | |
| ) | |
| # تشغيل السيرفر | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |