import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer from datasets import load_dataset from sentence_transformers import SentenceTransformer import faiss import numpy as np import os import sys # التحقق من تثبيت sentencepiece try: import sentencepiece print("✓ sentencepiece is installed") except ImportError: print("✗ sentencepiece is NOT installed - attempting to install...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "sentencepiece", "protobuf"]) import sentencepiece print("✓ sentencepiece installed successfully") # تحميل النموذج والـ tokenizer MODEL_NAME = "aab20abdullah/akin-yurt-finely" DATASET_NAME = "aab20abdullah/turkmen-martyrs-dataset" print("="*60) print("Loading model and tokenizer...") print("="*60) # محاولة تحميل tokenizer بعدة طرق tokenizer = None tokenizer_loaded = False # الطريقة 1: تجربة التحميل العادي try: print("Attempt 1: Loading with default settings...") tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True ) tokenizer_loaded = True print("✓ Tokenizer loaded successfully with default settings") except Exception as e: print(f"✗ Attempt 1 failed: {str(e)[:100]}") # الطريقة 2: محاولة استخدام slow tokenizer if not tokenizer_loaded: try: print("Attempt 2: Loading with use_fast=False...") tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=False ) tokenizer_loaded = True print("✓ Tokenizer loaded successfully with slow tokenizer") except Exception as e: print(f"✗ Attempt 2 failed: {str(e)[:100]}") # الطريقة 3: محاولة استخدام LlamaTokenizer مباشرة if not tokenizer_loaded: try: print("Attempt 3: Trying LlamaTokenizer directly...") tokenizer = LlamaTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True ) tokenizer_loaded = True print("✓ Tokenizer loaded successfully with LlamaTokenizer") except Exception as e: print(f"✗ Attempt 3 failed: {str(e)[:100]}") # الطريقة 4: استخدام tokenizer من نموذج متوافق كـ fallback if not tokenizer_loaded: try: print("Attempt 4: Using fallback tokenizer from compatible model...") # استخدام tokenizer من نموذج Llama2 العربي fallback_models = [ "mistralai/Mistral-7B-v0.1", "meta-llama/Llama-2-7b-hf", "facebook/opt-1.3b" ] for fallback_model in fallback_models: try: tokenizer = AutoTokenizer.from_pretrained(fallback_model) tokenizer_loaded = True print(f"✓ Using fallback tokenizer from {fallback_model}") break except: continue except Exception as e: print(f"✗ Attempt 4 failed: {str(e)[:100]}") if not tokenizer_loaded: raise RuntimeError( "Failed to load tokenizer! Please check:\n" "1. Model name is correct: aab20abdullah/akin-yurt-finely\n" "2. You have access to the model (if private)\n" "3. sentencepiece is properly installed\n" "4. Check the model card for special requirements" ) # تعيين pad_token إذا لم يكن موجوداً if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("✓ Set pad_token to eos_token") print("\nLoading model...") try: model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, trust_remote_code=True, low_cpu_mem_usage=True ) model.eval() print("✓ Model loaded successfully") except Exception as e: print(f"✗ Model loading failed: {e}") raise # تحميل نموذج الـ embeddings للـ RAG print("\nLoading embedding model...") try: embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') print("✓ Embedding model loaded successfully") except Exception as e: print(f"✗ Embedding model loading failed: {e}") raise # تحميل الـ dataset print("\nLoading dataset...") dataset = None try: dataset = load_dataset(DATASET_NAME, split='train') print(f"✓ Loaded dataset with {len(dataset)} examples") except Exception as e: print(f"⚠ Error loading with split='train': {e}") try: dataset = load_dataset(DATASET_NAME) if isinstance(dataset, dict): split_name = list(dataset.keys())[0] dataset = dataset[split_name] print(f"✓ Loaded dataset split '{split_name}' with {len(dataset)} examples") except Exception as e2: print(f"⚠ Error loading dataset: {e2}") print("Creating demo dataset for testing...") from datasets import Dataset dataset = Dataset.from_dict({ "text": [ "شهيد تركماني من العراق، استشهد في الدفاع عن أرضه.", "من شهداء تركمان تلعفر الذين ضحوا بأرواحهم.", "معلومات عن تاريخ وبطولات شهداء تركمان العراق.", "سيرة شهيد من أبناء الشعب التركماني في العراق.", "تضحيات شهداء تركمان في مواجهة الإرهاب والظلم." ] }) print(f"✓ Created demo dataset with {len(dataset)} examples") # طباعة معلومات عن الـ dataset if len(dataset) > 0: print(f"\nDataset info:") print(f" - Columns: {list(dataset[0].keys())}") print(f" - First item sample: {str(dataset[0])[:150]}...") # إعداد الـ RAG system print("\n" + "="*60) print("Building RAG index...") print("="*60) # استخراج النصوص من الـ dataset def extract_texts_from_dataset(dataset): """استخراج نصوص من dataset مع دعم بنى متعددة""" texts = [] for idx, item in enumerate(dataset): try: text_parts = [] # محاولة استخراج النصوص بطرق مختلفة for key, value in item.items(): if value is None: continue # نصوص if isinstance(value, str) and len(value) > 5: text_parts.append(f"{key}: {value}") # قوائم elif isinstance(value, list): list_str = ", ".join([str(v) for v in value if v]) if list_str: text_parts.append(f"{key}: {list_str}") # أرقام وقيم أخرى elif isinstance(value, (int, float, bool)): text_parts.append(f"{key}: {value}") if text_parts: text = " | ".join(text_parts) texts.append(text) elif 'text' in item and item['text']: texts.append(str(item['text'])) except Exception as e: if idx < 5: # فقط للعناصر الأولى print(f"⚠ Warning: Could not process item {idx}: {e}") continue # Fallback إذا لم نستخرج أي نصوص if not texts: print("⚠ Warning: No texts extracted, using raw dataset items") texts = [str(item) for item in dataset[:100]] return texts texts = extract_texts_from_dataset(dataset) print(f"✓ Extracted {len(texts)} text chunks from dataset") if texts: print(f" Sample text: {texts[0][:150]}...") # التحقق من وجود نصوص if len(texts) == 0: print("⚠ Error: No texts found! Creating demo texts...") texts = [ "معلومات افتراضية عن شهداء تركمان العراق", "بيانات تجريبية لاختبار نظام RAG", "نص تجريبي للتأكد من عمل النظام" ] # إنشاء embeddings print(f"\nCreating embeddings for {len(texts)} texts...") try: embeddings = embedding_model.encode(texts, show_progress_bar=True, batch_size=32) embeddings = np.array(embeddings).astype('float32') print(f"✓ Created embeddings with shape {embeddings.shape}") except Exception as e: print(f"✗ Error creating embeddings: {e}") raise # إنشاء FAISS index print("\nBuilding FAISS index...") try: dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) print(f"✓ FAISS index built with {index.ntotal} vectors") except Exception as e: print(f"✗ Error building FAISS index: {e}") raise print("\n" + "="*60) print("✓ RAG system ready!") print("="*60 + "\n") def retrieve_relevant_context(query, k=3): """استرجاع السياق الأكثر صلة بالاستعلام""" try: query_embedding = embedding_model.encode([query]) query_embedding = np.array(query_embedding).astype('float32') distances, indices = index.search(query_embedding, k) relevant_texts = [texts[idx] for idx in indices[0] if idx < len(texts)] return "\n\n".join(relevant_texts) except Exception as e: print(f"Error in retrieve_relevant_context: {e}") return "خطأ في استرجاع المعلومات" def generate_response(message, history, temperature=0.7, max_tokens=512, use_rag=True): """توليد الرد باستخدام النموذج مع أو بدون RAG""" try: conversation = [] # RAG context if use_rag: try: context = retrieve_relevant_context(message) system_message = f"""أنت مساعد ذكي. استخدم المعلومات التالية للإجابة على السؤال: المعلومات المرجعية: {context} أجب بناءً على هذه المعلومات. إذا لم تكن المعلومات كافية، قل ذلك.""" conversation.append({"role": "system", "content": system_message}) except Exception as e: print(f"⚠ RAG retrieval failed: {e}") # إضافة تاريخ المحادثة for user_msg, assistant_msg in history: conversation.append({"role": "user", "content": user_msg}) if assistant_msg: conversation.append({"role": "assistant", "content": assistant_msg}) # إضافة الرسالة الحالية conversation.append({"role": "user", "content": message}) # تحويل إلى prompt try: prompt = tokenizer.apply_chat_template( conversation, tokenize=False, add_generation_prompt=True ) except Exception as e: print(f"⚠ Chat template failed, using simple format: {e}") prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation]) prompt += "\nassistant: " # Tokenize inputs = tokenizer( prompt, return_tensors="pt", truncation=True, max_length=2048 ) if torch.cuda.is_available(): inputs = {k: v.to(model.device) for k, v in inputs.items()} # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, do_sample=temperature > 0, top_p=0.9, pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, repetition_penalty=1.1 ) # Decode response = tokenizer.decode( outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True ) return response.strip() except Exception as e: error_msg = f"عذراً، حدث خطأ: {str(e)}" print(f"✗ Error in generate_response: {e}") import traceback traceback.print_exc() return error_msg # إنشاء Gradio interface with gr.Blocks(title="Akin Yurt with RAG", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🤖 Akin Yurt Model with RAG نموذج **Akin Yurt** مع نظام **Retrieval-Augmented Generation (RAG)** لبيانات شهداء تركمان. يمكنك تفعيل أو تعطيل RAG لمقارنة النتائج. """) with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot( height=500, label="المحادثة", show_label=True, avatar_images=(None, "🤖") ) with gr.Row(): msg = gr.Textbox( label="رسالتك", placeholder="اكتب سؤالك هنا...", show_label=False, scale=4 ) submit = gr.Button("إرسال", variant="primary", scale=1) clear = gr.Button("مسح المحادثة") with gr.Column(scale=1): gr.Markdown("### ⚙️ الإعدادات") use_rag = gr.Checkbox( label="استخدام RAG", value=True, info="تفعيل استرجاع المعلومات من قاعدة البيانات" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature", info="يتحكم في عشوائية الإجابات" ) max_tokens = gr.Slider( minimum=128, maximum=2048, value=512, step=128, label="Max Tokens", info="الحد الأقصى لطول الإجابة" ) gr.Markdown(f""" ### 📊 معلومات النظام - **النموذج**: {MODEL_NAME} - **البيانات**: {DATASET_NAME} - **عدد السجلات**: {len(texts)} - **Tokenizer**: {'✓ Loaded' if tokenizer_loaded else '✗ Failed'} - **Device**: {'GPU' if torch.cuda.is_available() else 'CPU'} ### 💡 نصائح - جرّب تشغيل/إيقاف RAG لرؤية الفرق - Temperature منخفض = إجابات دقيقة - Temperature عالي = إجابات إبداعية """) def user_message(message, history): return "", history + [[message, None]] def bot_response(history, temperature, max_tokens, use_rag): message = history[-1][0] response = generate_response( message, history[:-1], temperature=temperature, max_tokens=max_tokens, use_rag=use_rag ) history[-1][1] = response return history # Event handlers msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then( bot_response, [chatbot, temperature, max_tokens, use_rag], chatbot ) submit.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then( bot_response, [chatbot, temperature, max_tokens, use_rag], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()