Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 26

Commit

51d3416

1 Parent(s): 24d5388

updatE

Browse files

Files changed (4) hide show

README.md +6 -6
app.py +40 -73
test_iraqi_model.py +2 -2
test_jais.py +54 -0

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 5.42.0
 app_file: app.py
 pinned: false
 models:
-- anaspro/iraqi-7b
 tags:
 - customer-support
 - arabic
@@ -18,13 +18,13 @@ tags:
 - multilingual
 ---
-ذكاء صناعي يتحدث باللهجة العراقية ويجيب باحترافية وذكاء.
 🚀 **المميزات:**
-- 🇮🇶 لهجة عراقية أصيلة وطبيعية
-- 🧠 إجابات ذكية واحترافية
-- 💬 محادثات متنوعة بالعراقي
-- 🎯 مدعوم بـ موديل ذكي مع تحسينات الأداء
 📞 احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.

 app_file: app.py
 pinned: false
 models:
+- inceptionai/jais-family-13b-chat
 tags:
 - customer-support
 - arabic
 - multilingual
 ---
+ذكاء صناعي متقدم يدعم اللغتين العربية والإنجليزية - Jais AI.
 🚀 **المميزات:**
+- 🌐 دعم ثنائي اللغة (عربي وإنجليزي)
+- 🧠 موديل Jais المتقدم من Inception
+- 💬 إجابات ذكية واحترافية
+- 🎯 مدعوم بـ موديل 13B مع تحسينات الأداء
 📞 احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ import spaces
 model_path = "inceptionai/jais-family-13b-chat"
-# Iraqi Arabic chat prompt
-prompt_ar = "### Instruction:إنت ذكاء صناعي اسمه \"أليكس\" تشتغل كمساعد دعم بشركة TechSolutions. تجاوب حصراً باللهجة العراقية، بدون فصحى نهائياً ولا إنكليزي إلا إذا الزبون استعمله. ردودك ودّية، مختصرة، واضحة، وتراعي شعور الزبون. ### Input:[|Human|] {Question} [|AI|] ### Response :"
 # إذا كان فيه HF_TOKEN في البيئة
 hf_token = os.getenv("HF_TOKEN")
@@ -39,36 +39,24 @@ if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 def get_response(text, tokenizer=tokenizer, model=model):
-    """نفس الدالة من documentation مع تعديل لـ chat model"""
-    tokenized = tokenizer(text, return_tensors="pt")
-    input_ids, attention_mask = tokenized['input_ids'].to(device), tokenized['attention_mask'].to(device)
-    input_len = input_ids.shape[-1]
     generate_ids = model.generate(
-        input_ids,
-        attention_mask=attention_mask,
-        past_key_values=None,  # إضافة past_key_values صراحة لتجنب الأخطاء
-        top_p=0.8,
-        temperature=0.2,
-        max_length=input_len + 256,  # Limit response length to prevent multiple responses
         min_length=input_len + 4,
-        repetition_penalty=1.3,
         do_sample=True,
-        pad_token_id=tokenizer.pad_token_id,
-        eos_token_id=tokenizer.eos_token_id  # Stop at end of sentence
     )
     response = tokenizer.batch_decode(
         generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
     )[0]
-    response = response.split("### Response :")[-1].lstrip()
-    # Extract only the first AI response to prevent multiple responses
-    if "[|AI|]" in response and "[|Human|]" in response:
-        # If there are multiple turns, take only the first AI response
-        response = response.split("[|Human|]")[0].strip()
-    elif "[|AI|]" in response:
-        # Remove the [|AI|] marker from the beginning
-        response = response.replace("[|AI|]", "").strip()
     return response
 def format_conversation_history(chat_history):
@@ -94,49 +82,28 @@ def detect_language(text):
 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
-    # Build conversation for Iraqi model format
-    conversation_parts = []
-    # Add chat history
-    if chat_history:
-        for item in chat_history:
-            role = item["role"]
-            content = item["content"]
-            if isinstance(content, list):
-                content = content[0]["text"] if content and "text" in content[0] else str(content)
-            if role == "user":
-                conversation_parts.append(f"[|Human|] {content}")
-            elif role == "assistant":
-                conversation_parts.append(f"[|AI|] {content}")
-    # Add current user message
-    conversation_parts.append(f"[|Human|] {input_data}")
-    # Join conversation
-    conversation = "\n".join(conversation_parts)
-    # Create full prompt using the Iraqi Arabic prompt template
-    full_prompt = prompt_ar.format(Question=conversation)
     try:
-        # استخدام دالة get_response من documentation
-        response = get_response(full_prompt)
-        # استخراج الرد الجديد فقط (بعد "### Response :")
-        if "### Response :" in response:
-            response = response.split("### Response :")[-1].strip()
-        if not response:
-            response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
         yield response
     except Exception as e:
         print(f"Error in generate_response: {e}")
         import traceback
         print(traceback.format_exc())
-        yield "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
 demo = gr.ChatInterface(
     fn=generate_response,
@@ -148,24 +115,24 @@ demo = gr.ChatInterface(
         gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
-        [{"text": "شرح لي كيف يشتغل الذكاء الاصطناعي"}],
-        [{"text": "قولي قصة قصيرة بالعراقي"}],
-        [{"text": "شنو رأيك بالوضع الاقتصادي الحالي؟"}],
-        [{"text": "ساعدني أفهم البرمجة"}],
-        [{"text": "أعطيني نصيحة للحياة اليومية"}],
     ],
     cache_examples=False,
     type="messages",
-    title="ذكاء عراقي - Iraqi AI Assistant",
-    description="""🤖 ذكاء صناعي يتحدث باللهجة العراقية
 ✨ المميزات:
-- 🇮🇶 لهجة عراقية أصيلة وطبيعية
-- 🧠 إجابات ذكية واحترافية
-- 💬 محادثات متنوعة بالعراقي
-- 🎯 مدعوم بـ موديل ذكي مع تحسينات الأداء
-احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.""",
     fill_height=True,
     textbox=gr.Textbox(
         label="اكتب رسالتك هنا",

 model_path = "inceptionai/jais-family-13b-chat"
+# Jais original prompts (مثل الكود الأصلي)
+prompt_eng = "### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n[|AI|]\n### Response :"
+prompt_ar = "### Instruction:اسمك \"جيس\" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :\n### Input:[|Human|] {Question}\n[|AI|]\n### Response :"
 # إذا كان فيه HF_TOKEN في البيئة
 hf_token = os.getenv("HF_TOKEN")
     tokenizer.pad_token = tokenizer.eos_token
 def get_response(text, tokenizer=tokenizer, model=model):
+    """نفس الدالة من الكود الأصلي مع تحسينات للأداء"""
+    input_ids = tokenizer(text, return_tensors="pt").input_ids
+    inputs = input_ids.to(device)
+    input_len = inputs.shape[-1]
     generate_ids = model.generate(
+        inputs,
+        top_p=0.9,
+        temperature=0.3,
+        max_length=2048,
         min_length=input_len + 4,
+        repetition_penalty=1.2,
         do_sample=True,
+        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
     )
     response = tokenizer.batch_decode(
         generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
     )[0]
+    response = response.split("### Response :")[-1]
     return response
 def format_conversation_history(chat_history):
 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
+    # Detect language of the current question (مثل الكود الأصلي)
+    def detect_language(text):
+        arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
+        total_chars = len(text.replace(' ', ''))
+        if total_chars == 0:
+            return 'ar'
+        arabic_ratio = arabic_chars / total_chars
+        return 'ar' if arabic_ratio > 0.3 else 'en'
+    lang = detect_language(input_data)
+    ques = input_data
+    text = prompt_ar.format_map({'Question': ques}) if lang == 'ar' else prompt_eng.format_map({'Question': ques})
     try:
+        response = get_response(text)
         yield response
     except Exception as e:
         print(f"Error in generate_response: {e}")
         import traceback
         print(traceback.format_exc())
+        yield "أعتذر، حدث خطأ. يرجى المحاولة مرة أخرى."
 demo = gr.ChatInterface(
     fn=generate_response,
         gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
     ],
     examples=[
+        [{"text": "ما هي عاصمة الامارات؟"}],
+        [{"text": "شرح لي الذكاء الاصطناعي"}],
+        [{"text": "أخبرني قصة قصيرة"}],
+        [{"text": "كيف أتعلم البرمجة؟"}],
+        [{"text": "What is the capital of UAE?"}],
     ],
     cache_examples=False,
     type="messages",
+    title="Jais AI - ذكاء صناعي متقدم",
+    description="""🤖 ذكاء صناعي متقدم يدعم اللغتين العربية والإنجليزية
 ✨ المميزات:
+- 🌐 دعم ثنائي اللغة (عربي وإنجليزي)
+- 🧠 موديل Jais المتقدم من Inception
+- 💬 إجابات ذكية واحترافية
+- 🎯 مدعوم بـ موديل 13B مع تحسينات الأداء
+احجي مع ذكاء Jais الاصطناعي في أي موضوع تريده.""",
     fill_height=True,
     textbox=gr.Textbox(
         label="اكتب رسالتك هنا",

test_iraqi_model.py CHANGED Viewed

@@ -8,10 +8,10 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 def test_model():
-    model_path = "anaspro/iraqi-7b"
     hf_token = os.getenv("HF_TOKEN")
-    print("جاري تحميل المودل العراقي...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_path,
         token=hf_token,

 from transformers import AutoTokenizer, AutoModelForCausalLM
 def test_model():
+    model_path = "inceptionai/jais-family-13b-chat"
     hf_token = os.getenv("HF_TOKEN")
+    print("جاري تحميل مودل Jais...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_path,
         token=hf_token,

test_jais.py ADDED Viewed

	@@ -0,0 +1,54 @@

+#!/usr/bin/env python3
+"""
+اختبار مودل Jais - مثل الكود الأصلي
+"""
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+def test_jais():
+    model_path = "inceptionai/jais-family-13b-chat"
+    # تحميل المودل مثل الكود الأصلي
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
+    # الـ prompts الأصلية
+    prompt_eng = "### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n[|AI|]\n### Response :"
+    prompt_ar = "### Instruction:اسمك \"جيس\" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :\n### Input:[|Human|] {Question}\n[|AI|]\n### Response :"
+    def get_response(text):
+        input_ids = tokenizer(text, return_tensors="pt").input_ids
+        inputs = input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
+        input_len = inputs.shape[-1]
+        generate_ids = model.generate(
+            inputs,
+            top_p=0.9,
+            temperature=0.3,
+            max_length=2048,
+            min_length=input_len + 4,
+            repetition_penalty=1.2,
+            do_sample=True,
+        )
+        response = tokenizer.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
+        )[0]
+        response = response.split("### Response :")[-1]
+        return response
+    # اختبار عربي
+    ques = "ما هي عاصمة الامارات؟"
+    text = prompt_ar.format_map({'Question': ques})
+    print("السؤال العربي:", ques)
+    print("الرد:", get_response(text))
+    print()
+    # اختبار إنجليزي
+    ques = "What is the capital of UAE?"
+    text = prompt_eng.format_map({'Question': ques})
+    print("السؤال الإنجليزي:", ques)
+    print("الرد:", get_response(text))
+if __name__ == "__main__":
+    test_jais()