anaspro
commited on
Commit
·
51d3416
1
Parent(s):
24d5388
updatE
Browse files- README.md +6 -6
- app.py +40 -73
- test_iraqi_model.py +2 -2
- test_jais.py +54 -0
README.md
CHANGED
|
@@ -8,7 +8,7 @@ sdk_version: 5.42.0
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
models:
|
| 11 |
-
-
|
| 12 |
tags:
|
| 13 |
- customer-support
|
| 14 |
- arabic
|
|
@@ -18,13 +18,13 @@ tags:
|
|
| 18 |
- multilingual
|
| 19 |
---
|
| 20 |
|
| 21 |
-
ذكاء صناعي
|
| 22 |
|
| 23 |
🚀 **المميزات:**
|
| 24 |
-
-
|
| 25 |
-
- 🧠
|
| 26 |
-
- 💬
|
| 27 |
-
- 🎯 مدعوم بـ موديل
|
| 28 |
|
| 29 |
📞 احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.
|
| 30 |
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
models:
|
| 11 |
+
- inceptionai/jais-family-13b-chat
|
| 12 |
tags:
|
| 13 |
- customer-support
|
| 14 |
- arabic
|
|
|
|
| 18 |
- multilingual
|
| 19 |
---
|
| 20 |
|
| 21 |
+
ذكاء صناعي متقدم يدعم اللغتين العربية والإنجليزية - Jais AI.
|
| 22 |
|
| 23 |
🚀 **المميزات:**
|
| 24 |
+
- 🌐 دعم ثنائي اللغة (عربي وإنجليزي)
|
| 25 |
+
- 🧠 موديل Jais المتقدم من Inception
|
| 26 |
+
- 💬 إجابات ذكية واحترافية
|
| 27 |
+
- 🎯 مدعوم بـ موديل 13B مع تحسينات الأداء
|
| 28 |
|
| 29 |
📞 احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.
|
| 30 |
|
app.py
CHANGED
|
@@ -8,9 +8,9 @@ import spaces
|
|
| 8 |
|
| 9 |
model_path = "inceptionai/jais-family-13b-chat"
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
# إذا كان فيه HF_TOKEN في البيئة
|
| 16 |
hf_token = os.getenv("HF_TOKEN")
|
|
@@ -39,36 +39,24 @@ if tokenizer.pad_token is None:
|
|
| 39 |
tokenizer.pad_token = tokenizer.eos_token
|
| 40 |
|
| 41 |
def get_response(text, tokenizer=tokenizer, model=model):
|
| 42 |
-
"""نفس الدالة من
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
input_len =
|
| 46 |
generate_ids = model.generate(
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
temperature=0.2,
|
| 52 |
-
max_length=input_len + 256, # Limit response length to prevent multiple responses
|
| 53 |
min_length=input_len + 4,
|
| 54 |
-
repetition_penalty=1.
|
| 55 |
do_sample=True,
|
| 56 |
-
pad_token_id=tokenizer.pad_token_id
|
| 57 |
-
eos_token_id=tokenizer.eos_token_id # Stop at end of sentence
|
| 58 |
)
|
| 59 |
response = tokenizer.batch_decode(
|
| 60 |
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
| 61 |
)[0]
|
| 62 |
-
response = response.split("### Response :")[-1]
|
| 63 |
-
|
| 64 |
-
# Extract only the first AI response to prevent multiple responses
|
| 65 |
-
if "[|AI|]" in response and "[|Human|]" in response:
|
| 66 |
-
# If there are multiple turns, take only the first AI response
|
| 67 |
-
response = response.split("[|Human|]")[0].strip()
|
| 68 |
-
elif "[|AI|]" in response:
|
| 69 |
-
# Remove the [|AI|] marker from the beginning
|
| 70 |
-
response = response.replace("[|AI|]", "").strip()
|
| 71 |
-
|
| 72 |
return response
|
| 73 |
|
| 74 |
def format_conversation_history(chat_history):
|
|
@@ -94,49 +82,28 @@ def detect_language(text):
|
|
| 94 |
|
| 95 |
@spaces.GPU()
|
| 96 |
def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 97 |
-
#
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
conversation_parts.append(f"[|Human|] {content}")
|
| 110 |
-
elif role == "assistant":
|
| 111 |
-
conversation_parts.append(f"[|AI|] {content}")
|
| 112 |
-
|
| 113 |
-
# Add current user message
|
| 114 |
-
conversation_parts.append(f"[|Human|] {input_data}")
|
| 115 |
-
|
| 116 |
-
# Join conversation
|
| 117 |
-
conversation = "\n".join(conversation_parts)
|
| 118 |
-
|
| 119 |
-
# Create full prompt using the Iraqi Arabic prompt template
|
| 120 |
-
full_prompt = prompt_ar.format(Question=conversation)
|
| 121 |
|
| 122 |
try:
|
| 123 |
-
|
| 124 |
-
response = get_response(full_prompt)
|
| 125 |
-
|
| 126 |
-
# استخراج الرد الجديد فقط (بعد "### Response :")
|
| 127 |
-
if "### Response :" in response:
|
| 128 |
-
response = response.split("### Response :")[-1].strip()
|
| 129 |
-
|
| 130 |
-
if not response:
|
| 131 |
-
response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
|
| 132 |
-
|
| 133 |
yield response
|
| 134 |
|
| 135 |
except Exception as e:
|
| 136 |
print(f"Error in generate_response: {e}")
|
| 137 |
import traceback
|
| 138 |
print(traceback.format_exc())
|
| 139 |
-
yield "
|
| 140 |
|
| 141 |
demo = gr.ChatInterface(
|
| 142 |
fn=generate_response,
|
|
@@ -148,24 +115,24 @@ demo = gr.ChatInterface(
|
|
| 148 |
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
|
| 149 |
],
|
| 150 |
examples=[
|
| 151 |
-
[{"text": "
|
| 152 |
-
[{"text": "
|
| 153 |
-
[{"text": "
|
| 154 |
-
[{"text": "
|
| 155 |
-
[{"text": "
|
| 156 |
],
|
| 157 |
cache_examples=False,
|
| 158 |
type="messages",
|
| 159 |
-
title="
|
| 160 |
-
description="""🤖 ذكاء صناعي
|
| 161 |
|
| 162 |
✨ المميزات:
|
| 163 |
-
-
|
| 164 |
-
- 🧠
|
| 165 |
-
- 💬
|
| 166 |
-
- 🎯 مدعوم بـ موديل
|
| 167 |
|
| 168 |
-
احجي مع
|
| 169 |
fill_height=True,
|
| 170 |
textbox=gr.Textbox(
|
| 171 |
label="اكتب رسالتك هنا",
|
|
|
|
| 8 |
|
| 9 |
model_path = "inceptionai/jais-family-13b-chat"
|
| 10 |
|
| 11 |
+
# Jais original prompts (مثل الكود الأصلي)
|
| 12 |
+
prompt_eng = "### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n[|AI|]\n### Response :"
|
| 13 |
+
prompt_ar = "### Instruction:اسمك \"جيس\" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :\n### Input:[|Human|] {Question}\n[|AI|]\n### Response :"
|
| 14 |
|
| 15 |
# إذا كان فيه HF_TOKEN في البيئة
|
| 16 |
hf_token = os.getenv("HF_TOKEN")
|
|
|
|
| 39 |
tokenizer.pad_token = tokenizer.eos_token
|
| 40 |
|
| 41 |
def get_response(text, tokenizer=tokenizer, model=model):
|
| 42 |
+
"""نفس الدالة من الكود الأصلي مع تحسينات للأداء"""
|
| 43 |
+
input_ids = tokenizer(text, return_tensors="pt").input_ids
|
| 44 |
+
inputs = input_ids.to(device)
|
| 45 |
+
input_len = inputs.shape[-1]
|
| 46 |
generate_ids = model.generate(
|
| 47 |
+
inputs,
|
| 48 |
+
top_p=0.9,
|
| 49 |
+
temperature=0.3,
|
| 50 |
+
max_length=2048,
|
|
|
|
|
|
|
| 51 |
min_length=input_len + 4,
|
| 52 |
+
repetition_penalty=1.2,
|
| 53 |
do_sample=True,
|
| 54 |
+
pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
|
|
|
|
| 55 |
)
|
| 56 |
response = tokenizer.batch_decode(
|
| 57 |
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
| 58 |
)[0]
|
| 59 |
+
response = response.split("### Response :")[-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
return response
|
| 61 |
|
| 62 |
def format_conversation_history(chat_history):
|
|
|
|
| 82 |
|
| 83 |
@spaces.GPU()
|
| 84 |
def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
|
| 85 |
+
# Detect language of the current question (مثل الكود الأصلي)
|
| 86 |
+
def detect_language(text):
|
| 87 |
+
arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
|
| 88 |
+
total_chars = len(text.replace(' ', ''))
|
| 89 |
+
if total_chars == 0:
|
| 90 |
+
return 'ar'
|
| 91 |
+
arabic_ratio = arabic_chars / total_chars
|
| 92 |
+
return 'ar' if arabic_ratio > 0.3 else 'en'
|
| 93 |
+
|
| 94 |
+
lang = detect_language(input_data)
|
| 95 |
+
ques = input_data
|
| 96 |
+
text = prompt_ar.format_map({'Question': ques}) if lang == 'ar' else prompt_eng.format_map({'Question': ques})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
try:
|
| 99 |
+
response = get_response(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
yield response
|
| 101 |
|
| 102 |
except Exception as e:
|
| 103 |
print(f"Error in generate_response: {e}")
|
| 104 |
import traceback
|
| 105 |
print(traceback.format_exc())
|
| 106 |
+
yield "أعتذر، حدث خطأ. يرجى المحاولة مرة أخرى."
|
| 107 |
|
| 108 |
demo = gr.ChatInterface(
|
| 109 |
fn=generate_response,
|
|
|
|
| 115 |
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
|
| 116 |
],
|
| 117 |
examples=[
|
| 118 |
+
[{"text": "ما هي عاصمة الامارات؟"}],
|
| 119 |
+
[{"text": "شرح لي الذكاء الاصطناعي"}],
|
| 120 |
+
[{"text": "أخبرني قصة قصيرة"}],
|
| 121 |
+
[{"text": "كيف أتعلم البرمجة؟"}],
|
| 122 |
+
[{"text": "What is the capital of UAE?"}],
|
| 123 |
],
|
| 124 |
cache_examples=False,
|
| 125 |
type="messages",
|
| 126 |
+
title="Jais AI - ذكاء صناعي متقدم",
|
| 127 |
+
description="""🤖 ذكاء صناعي متقدم يدعم اللغتين العربية والإنجليزية
|
| 128 |
|
| 129 |
✨ المميزات:
|
| 130 |
+
- 🌐 دعم ثنائي اللغة (عربي وإنجليزي)
|
| 131 |
+
- 🧠 موديل Jais المتقدم من Inception
|
| 132 |
+
- 💬 إجابات ذكية واحترافية
|
| 133 |
+
- 🎯 مدعوم بـ موديل 13B مع تحسينات الأداء
|
| 134 |
|
| 135 |
+
احجي مع ذكاء Jais الاصطناعي في أي موضوع تريده.""",
|
| 136 |
fill_height=True,
|
| 137 |
textbox=gr.Textbox(
|
| 138 |
label="اكتب رسالتك هنا",
|
test_iraqi_model.py
CHANGED
|
@@ -8,10 +8,10 @@ import torch
|
|
| 8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 9 |
|
| 10 |
def test_model():
|
| 11 |
-
model_path = "
|
| 12 |
hf_token = os.getenv("HF_TOKEN")
|
| 13 |
|
| 14 |
-
print("جاري تحميل
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
model_path,
|
| 17 |
token=hf_token,
|
|
|
|
| 8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 9 |
|
| 10 |
def test_model():
|
| 11 |
+
model_path = "inceptionai/jais-family-13b-chat"
|
| 12 |
hf_token = os.getenv("HF_TOKEN")
|
| 13 |
|
| 14 |
+
print("جاري تحميل مودل Jais...")
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 16 |
model_path,
|
| 17 |
token=hf_token,
|
test_jais.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
اختبار مودل Jais - مثل الكود الأصلي
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 9 |
+
|
| 10 |
+
def test_jais():
|
| 11 |
+
model_path = "inceptionai/jais-family-13b-chat"
|
| 12 |
+
|
| 13 |
+
# تحميل المودل مثل الكود الأصلي
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 15 |
+
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
|
| 16 |
+
|
| 17 |
+
# الـ prompts الأصلية
|
| 18 |
+
prompt_eng = "### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n[|AI|]\n### Response :"
|
| 19 |
+
prompt_ar = "### Instruction:اسمك \"جيس\" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :\n### Input:[|Human|] {Question}\n[|AI|]\n### Response :"
|
| 20 |
+
|
| 21 |
+
def get_response(text):
|
| 22 |
+
input_ids = tokenizer(text, return_tensors="pt").input_ids
|
| 23 |
+
inputs = input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 24 |
+
input_len = inputs.shape[-1]
|
| 25 |
+
generate_ids = model.generate(
|
| 26 |
+
inputs,
|
| 27 |
+
top_p=0.9,
|
| 28 |
+
temperature=0.3,
|
| 29 |
+
max_length=2048,
|
| 30 |
+
min_length=input_len + 4,
|
| 31 |
+
repetition_penalty=1.2,
|
| 32 |
+
do_sample=True,
|
| 33 |
+
)
|
| 34 |
+
response = tokenizer.batch_decode(
|
| 35 |
+
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
| 36 |
+
)[0]
|
| 37 |
+
response = response.split("### Response :")[-1]
|
| 38 |
+
return response
|
| 39 |
+
|
| 40 |
+
# اختبار عربي
|
| 41 |
+
ques = "ما هي عاصمة الامارات؟"
|
| 42 |
+
text = prompt_ar.format_map({'Question': ques})
|
| 43 |
+
print("السؤال العربي:", ques)
|
| 44 |
+
print("الرد:", get_response(text))
|
| 45 |
+
print()
|
| 46 |
+
|
| 47 |
+
# اختبار إنجليزي
|
| 48 |
+
ques = "What is the capital of UAE?"
|
| 49 |
+
text = prompt_eng.format_map({'Question': ques})
|
| 50 |
+
print("السؤال الإنجليزي:", ques)
|
| 51 |
+
print("الرد:", get_response(text))
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
test_jais()
|