anaspro commited on
Commit
2c062aa
·
1 Parent(s): daf6e69
Files changed (2) hide show
  1. app.py +81 -33
  2. app2.py +103 -93
app.py CHANGED
@@ -18,6 +18,63 @@ DEFAULT_SYSTEM_PROMPT = load_system_prompt()
18
 
19
  model_path = "inceptionai/jais-adapted-7b-chat"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Jais chat prompts from documentation
22
  prompt_eng = """### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:
23
  ### Input: [|Human|] {Question}
@@ -85,50 +142,41 @@ def detect_language(text):
85
 
86
  @spaces.GPU()
87
  def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
88
- # Detect language of the current question
89
- lang = detect_language(input_data)
90
- prompt_template = prompt_ar if lang == 'ar' else prompt_eng
91
-
92
- # Build conversation for Jais format
93
- conversation_parts = []
94
-
95
- # Add system prompt as part of the instruction (keep it short for Jais)
96
- system_instruction = "اسمك \"أليكس\" وأنت مساعد خدمة العملاء في شركة TechSolutions. مهمتك مساعدة العملاء في حل مشاكلهم مع المنتجات والإجابة عن أسئلتهم حول الخدمات. كن ودوداً وصبوراً ومحترماً. أجب بالعربية أو الإنجليزية حسب تفضيل العميل. ابدأ بالتحية وكن مباشراً في الحلول."
97
 
98
- # Add chat history
99
- if chat_history:
100
- for item in chat_history:
101
- role = item["role"]
102
- content = item["content"]
103
- if isinstance(content, list):
104
- content = content[0]["text"] if content and "text" in content[0] else str(content)
105
 
106
- if role == "user":
107
- conversation_parts.append(f"[|Human|] {content}")
108
- elif role == "assistant":
109
- conversation_parts.append(f"[|AI|] {content}")
 
 
 
 
110
 
111
- # Add current user message
112
- conversation_parts.append(f"[|Human|] {input_data}")
113
- conversation_parts.append("[|AI|]")
114
 
115
- # Join conversation
116
- conversation = "\n".join(conversation_parts)
117
 
118
- # Create full prompt using Jais format with our system prompt
119
- full_prompt = f"### Instruction:{system_instruction}\n### Input:{conversation}\n### Response :"
120
 
121
- try:
122
- # استخدام دالة get_response من documentation
123
- response = get_response(full_prompt)
124
 
125
- # استخراج الرد الجديد فقط (بعد "### Response :")
126
- if "### Response :" in response:
127
- response = response.split("### Response :")[-1].strip()
128
 
129
  if not response:
130
  response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
131
 
 
132
  yield response
133
 
134
  except Exception as e:
 
18
 
19
  model_path = "inceptionai/jais-adapted-7b-chat"
20
 
21
+ # Gemma-3 chat template for compatibility
22
+ GEMMA_CHAT_TEMPLATE = "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% set messages = messages[1:] %}{% else %}{% set system_message = false %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{% if loop.first and system_message %}{{ '<start_of_turn>' + role + '\n' + system_message + '\n\n' + message['content'] | trim + '<end_of_turn>\n' }}{% else %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<start_of_turn>model\n' }}{% endif %}"
23
+
24
+ def apply_gemma_template(messages, add_generation_prompt=True):
25
+ """Apply Gemma-3 chat template for models based on Gemma-3"""
26
+ try:
27
+ # Try to use tokenizer's built-in template first
28
+ if hasattr(tokenizer, 'apply_chat_template'):
29
+ return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=add_generation_prompt)
30
+
31
+ # Manual implementation based on the template
32
+ result = tokenizer.bos_token or ""
33
+
34
+ system_message = None
35
+ if messages and messages[0]['role'] == 'system':
36
+ system_message = messages[0]['content']
37
+ messages = messages[1:]
38
+
39
+ for i, message in enumerate(messages):
40
+ if message['role'] == 'assistant':
41
+ role = 'model'
42
+ else:
43
+ role = message['role']
44
+
45
+ result += f"<start_of_turn>{role}\n"
46
+
47
+ if i == 0 and system_message:
48
+ result += f"{system_message}\n\n"
49
+
50
+ if isinstance(message['content'], str):
51
+ result += message['content'].strip()
52
+ elif isinstance(message['content'], list):
53
+ for item in message['content']:
54
+ if item.get('type') == 'text':
55
+ result += item['text'].strip()
56
+
57
+ result += "<end_of_turn>\n"
58
+
59
+ if add_generation_prompt:
60
+ result += "<start_of_turn>model\n"
61
+
62
+ return result
63
+
64
+ except Exception as e:
65
+ print(f"Error in Gemma template: {e}")
66
+ # Fallback
67
+ prompt = ""
68
+ for msg in messages:
69
+ if msg["role"] == "system":
70
+ prompt += f"System: {msg['content']}\n"
71
+ elif msg["role"] == "user":
72
+ prompt += f"Human: {msg['content']}\n"
73
+ elif msg["role"] == "assistant":
74
+ prompt += f"Assistant: {msg['content']}\n"
75
+ prompt += "Assistant:"
76
+ return prompt
77
+
78
  # Jais chat prompts from documentation
79
  prompt_eng = """### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:
80
  ### Input: [|Human|] {Question}
 
142
 
143
  @spaces.GPU()
144
  def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
145
+ try:
146
+ # Build messages for Gemma format
147
+ messages = []
 
 
 
 
 
 
148
 
149
+ if DEFAULT_SYSTEM_PROMPT:
150
+ messages.append({"role": "system", "content": DEFAULT_SYSTEM_PROMPT})
 
 
 
 
 
151
 
152
+ # Add chat history
153
+ if chat_history:
154
+ for item in chat_history:
155
+ role = item["role"]
156
+ content = item["content"]
157
+ if isinstance(content, list):
158
+ content = content[0]["text"] if content and "text" in content[0] else str(content)
159
+ messages.append({"role": role, "content": content})
160
 
161
+ # Add current user input
162
+ messages.append({"role": "user", "content": input_data})
 
163
 
164
+ # Use Gemma template for the model
165
+ prompt = apply_gemma_template(messages)
166
 
167
+ print(f"Generated Gemma prompt: {prompt[:200]}...") # Debug
 
168
 
169
+ # استخدام دالة get_response مع Gemma prompt
170
+ response = get_response(prompt)
 
171
 
172
+ # استخراج الرد الجديد فقط (بعد آخر <end_of_turn>)
173
+ if "<end_of_turn>" in response:
174
+ response = response.split("<end_of_turn>")[-1].strip()
175
 
176
  if not response:
177
  response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
178
 
179
+ print(f"Final response: {response[:100]}...") # Debug
180
  yield response
181
 
182
  except Exception as e:
app2.py CHANGED
@@ -1,7 +1,8 @@
 
 
1
  import os
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
4
- from threading import Thread
5
  import gradio as gr
6
  import spaces
7
 
@@ -15,74 +16,51 @@ def load_system_prompt():
15
 
16
  DEFAULT_SYSTEM_PROMPT = load_system_prompt()
17
 
18
- model_path = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # إذا كان فيه HF_TOKEN في البيئة
21
  hf_token = os.getenv("HF_TOKEN")
22
 
23
- # استخدام ChatPipeline بدلاً من text-generation العادي
 
24
  tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
25
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto", token=hf_token)
26
-
27
- # إنشاء chat pipeline مخصص مع streaming
28
- def create_chat_pipeline(tokenizer, model):
29
- """إنشاء pipeline مخصص للدردشة مع chat template و streaming"""
30
- def chat_generate(messages, streamer=None, **kwargs):
31
- # تحويل الرسائل للـ chat template
32
- if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
33
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
34
- else:
35
- # Fallback للموديلات اللي ما عندها chat template
36
- prompt = ""
37
- for msg in messages:
38
- if msg["role"] == "system":
39
- prompt += f"System: {msg['content']}\n"
40
- elif msg["role"] == "user":
41
- prompt += f"Human: {msg['content']}\n"
42
- elif msg["role"] == "assistant":
43
- prompt += f"Assistant: {msg['content']}\n"
44
- prompt += "Assistant:"
45
-
46
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
-
48
- # توليد الرد مع streaming إذا كان مطلوب
49
- if streamer:
50
- generation_kwargs = {
51
- **inputs,
52
- "max_new_tokens": kwargs.get('max_new_tokens', 512),
53
- "temperature": kwargs.get('temperature', 0.7),
54
- "top_p": kwargs.get('top_p', 0.9),
55
- "top_k": kwargs.get('top_k', 50),
56
- "repetition_penalty": kwargs.get('repetition_penalty', 1.1),
57
- "do_sample": True,
58
- "pad_token_id": tokenizer.eos_token_id,
59
- "streamer": streamer,
60
- }
61
-
62
- # نرجع الـ thread للتشغيل
63
- return generation_kwargs
64
- else:
65
- # للتوليد العادي بدون streaming
66
- with torch.no_grad():
67
- outputs = model.generate(
68
- **inputs,
69
- max_new_tokens=kwargs.get('max_new_tokens', 512),
70
- temperature=kwargs.get('temperature', 0.7),
71
- top_p=kwargs.get('top_p', 0.9),
72
- top_k=kwargs.get('top_k', 50),
73
- repetition_penalty=kwargs.get('repetition_penalty', 1.1),
74
- do_sample=True,
75
- pad_token_id=tokenizer.eos_token_id,
76
- return_dict_in_generate=True,
77
- output_scores=False,
78
- )
79
-
80
- response = tokenizer.decode(outputs.sequences[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
81
- return [{"generated_text": response}]
82
-
83
- return chat_generate
84
-
85
- pipe = create_chat_pipeline(tokenizer, model)
86
 
87
  def format_conversation_history(chat_history):
88
  messages = []
@@ -94,39 +72,71 @@ def format_conversation_history(chat_history):
94
  messages.append({"role": role, "content": content})
95
  return messages
96
 
 
 
 
 
 
 
 
 
 
 
 
97
  @spaces.GPU()
98
  def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
99
- # حذف الهستوري كله - نبدأ محادثة جديدة دايماً
100
- # chat_history = [] # تعطيل الهستوري
101
-
102
- new_message = {"role": "user", "content": input_data}
103
-
104
- # Build messages for Llama chat template
105
- messages = [{"role": "system", "content": DEFAULT_SYSTEM_PROMPT}]
106
- # لا نضيف chat_history القديم
107
-
108
- # استخدام ChatPipeline المخصص مع streaming
109
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
110
-
111
- generation_kwargs = pipe(
112
- messages,
113
- streamer=streamer,
114
- max_new_tokens=max_new_tokens,
115
- temperature=temperature,
116
- top_p=top_p,
117
- top_k=top_k,
118
- repetition_penalty=repetition_penalty
119
- )
120
 
121
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
122
- thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # Stream the response
125
- response = ""
126
- for chunk in streamer:
127
- response += chunk
128
  yield response
129
 
 
 
 
 
 
 
130
  demo = gr.ChatInterface(
131
  fn=generate_response,
132
  additional_inputs=[
@@ -153,7 +163,7 @@ demo = gr.ChatInterface(
153
  - 💬 لهجة محادثة طبيعية
154
  - 🔧 دعم فني واستكشاف الأخطاء
155
  - 📋 معلومات الخدمات والإرشاد
156
- - 🎯 مدعوم بـ موديل anaspro العراقي (Llama 3.1 محسن للعربية العراقية)
157
 
158
  احجي مع أليكس لحل مشاكلك التقنية، استفسر عن الخدمات، أو احصل على معلومات المنتجات.""",
159
  fill_height=True,
 
1
+ # -*- coding: utf-8 -*-
2
+
3
  import os
4
  import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
6
  import gradio as gr
7
  import spaces
8
 
 
16
 
17
  DEFAULT_SYSTEM_PROMPT = load_system_prompt()
18
 
19
+ model_path = "inceptionai/jais-adapted-7b-chat"
20
+
21
+ # Jais chat prompts from documentation
22
+ prompt_eng = """### Instruction:Your name is 'Jais', and you are named after Jebel Jais, the highest mountain in UAE. You were made by 'Inception' in the UAE. You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Complete the conversation between [|Human|] and [|AI|]:
23
+ ### Input: [|Human|] {Question}
24
+ [|AI|]
25
+ ### Response :"""
26
+
27
+ prompt_ar = """### Instruction:اسمك "جيس" وسميت على اسم جبل جيس اعلى جبل في الامارات. تم بنائك بواسطة Inception في الإمارات. أنت مساعد مفيد ومحترم وصادق. أجب دائمًا بأكبر قدر ممكن من المساعدة، مع الحفاظ على البقاء أمناً. أكمل المحادثة بين [|Human|] و[|AI|] :
28
+ ### Input:[|Human|] {Question}
29
+ [|AI|]
30
+ ### Response :"""
31
 
32
  # إذا كان فيه HF_TOKEN في البيئة
33
  hf_token = os.getenv("HF_TOKEN")
34
 
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+
37
  tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
38
+ model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True, token=hf_token)
39
+
40
+ if tokenizer.pad_token is None:
41
+ tokenizer.pad_token = tokenizer.eos_token
42
+
43
+ def get_response(text, tokenizer=tokenizer, model=model):
44
+ """نفس الدالة من documentation مع تعديل لـ chat model"""
45
+ tokenized = tokenizer(text, return_tensors="pt")
46
+ input_ids, attention_mask = tokenized['input_ids'].to(device), tokenized['attention_mask'].to(device)
47
+ input_len = input_ids.shape[-1]
48
+ generate_ids = model.generate(
49
+ input_ids,
50
+ attention_mask=attention_mask,
51
+ top_p=0.9,
52
+ temperature=0.3,
53
+ max_length=2048,
54
+ min_length=input_len + 4,
55
+ repetition_penalty=1.2,
56
+ do_sample=True,
57
+ pad_token_id=tokenizer.pad_token_id
58
+ )
59
+ response = tokenizer.batch_decode(
60
+ generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
61
+ )[0]
62
+ response = response.split("### Response :")[-1].lstrip()
63
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def format_conversation_history(chat_history):
66
  messages = []
 
72
  messages.append({"role": role, "content": content})
73
  return messages
74
 
75
+ def detect_language(text):
76
+ """Simple language detection - Arabic vs English"""
77
+ arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
78
+ total_chars = len(text.replace(' ', ''))
79
+
80
+ if total_chars == 0:
81
+ return 'ar' # default to Arabic
82
+
83
+ arabic_ratio = arabic_chars / total_chars
84
+ return 'ar' if arabic_ratio > 0.3 else 'en'
85
+
86
  @spaces.GPU()
87
  def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
88
+ # Detect language of the current question
89
+ lang = detect_language(input_data)
90
+ prompt_template = prompt_ar if lang == 'ar' else prompt_eng
91
+
92
+ # Build conversation for Jais format
93
+ conversation_parts = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Add system prompt as part of the instruction (keep it short for Jais)
96
+ system_instruction = "اسمك \"أليكس\" وأنت مساعد خدمة العملاء في شركة TechSolutions. مهمتك مساعدة العملاء في حل مشاكلهم مع المنتجات والإجابة عن أسئلتهم حول الخدمات. كن ودوداً وصبوراً ومحترماً. أجب بالعربية أو الإنجليزية حسب تفضيل العميل. ابدأ بالتحية وكن مباشراً في الحلول."
97
+
98
+ # Add chat history
99
+ if chat_history:
100
+ for item in chat_history:
101
+ role = item["role"]
102
+ content = item["content"]
103
+ if isinstance(content, list):
104
+ content = content[0]["text"] if content and "text" in content[0] else str(content)
105
+
106
+ if role == "user":
107
+ conversation_parts.append(f"[|Human|] {content}")
108
+ elif role == "assistant":
109
+ conversation_parts.append(f"[|AI|] {content}")
110
+
111
+ # Add current user message
112
+ conversation_parts.append(f"[|Human|] {input_data}")
113
+ conversation_parts.append("[|AI|]")
114
+
115
+ # Join conversation
116
+ conversation = "\n".join(conversation_parts)
117
+
118
+ # Create full prompt using Jais format with our system prompt
119
+ full_prompt = f"### Instruction:{system_instruction}\n### Input:{conversation}\n### Response :"
120
+
121
+ try:
122
+ # استخدام دالة get_response من documentation
123
+ response = get_response(full_prompt)
124
+
125
+ # استخراج الرد الجديد فقط (بعد "### Response :")
126
+ if "### Response :" in response:
127
+ response = response.split("### Response :")[-1].strip()
128
+
129
+ if not response:
130
+ response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
131
 
 
 
 
 
132
  yield response
133
 
134
+ except Exception as e:
135
+ print(f"Error in generate_response: {e}")
136
+ import traceback
137
+ print(traceback.format_exc())
138
+ yield "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
139
+
140
  demo = gr.ChatInterface(
141
  fn=generate_response,
142
  additional_inputs=[
 
163
  - 💬 لهجة محادثة طبيعية
164
  - 🔧 دعم فني واستكشاف الأخطاء
165
  - 📋 معلومات الخدمات والإرشاد
166
+ - 🎯 مدعوم بـ موديل Unsloth Meta-Llama-3.1-8B-Instruct (مع تحسينات الأداء)
167
 
168
  احجي مع أليكس لحل مشاكلك التقنية، استفسر عن الخدمات، أو احصل على معلومات المنتجات.""",
169
  fill_height=True,