import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # ========================= # CONFIG # ========================= MODEL_NAME = "kawkabelaloom/astramindx" SYSTEM_PROMPT = "أنت مساعد عربي ذكي، تجيب بوضوح وبأسلوب بسيط ومفيد." # ========================= # LOAD TOKENIZER # ========================= print("🔄 Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True ) # ========================= # LOAD MODEL (FORCE CPU – NO QUANTIZATION) # ========================= print("🔄 Loading model (CPU, no quantization)...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map=None, # ❌ لا device_map auto torch_dtype=torch.float32, # CPU safe low_cpu_mem_usage=False, # مهم trust_remote_code=True, quantization_config=None # 🔥 الحل الأساسي ) model.eval() print("✅ Model loaded successfully") # ========================= # PIPELINE # ========================= generator = pipeline( "text-generation", model=model, tokenizer=tokenizer ) # ========================= # CHAT LOGIC # ========================= def build_prompt(history, user_message): prompt = SYSTEM_PROMPT + "\n\n" for user, bot in history: prompt += f"المستخدم: {user}\nالمساعد: {bot}\n" prompt += f"المستخدم: {user_message}\nالمساعد:" return prompt def chat(user_message, history): try: prompt = build_prompt(history, user_message) output = generator( prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, do_sample=True ) response = output[0]["generated_text"].split("المساعد:")[-1].strip() history.append((user_message, response)) return history, "" except Exception as e: history.append((user_message, f"❌ Error: {str(e)}")) return history, "" # ========================= # GRADIO UI # ========================= with gr.Blocks() as demo: gr.Markdown("# 🤖 Astramindx Chatbot") chatbot = gr.Chatbot(height=450) msg = gr.Textbox(placeholder="اكتب سؤالك هنا...") msg.submit(chat, [msg, chatbot], [chatbot, msg]) demo.launch()