anaspro
commited on
Commit
·
8038e28
1
Parent(s):
154d3ef
upadte
Browse files
app.py
CHANGED
|
@@ -24,9 +24,9 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
|
|
| 24 |
hf_token = os.getenv("HF_TOKEN")
|
| 25 |
|
| 26 |
# Initialize pipeline for chat
|
| 27 |
-
#
|
| 28 |
pipeline_model = pipeline(
|
| 29 |
-
"text-
|
| 30 |
model=model_path,
|
| 31 |
device=0, # Use GPU device directly
|
| 32 |
torch_dtype=torch.bfloat16,
|
|
@@ -59,7 +59,6 @@ def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=
|
|
| 59 |
repetition_penalty=repetition_penalty,
|
| 60 |
do_sample=True,
|
| 61 |
return_full_text=False,
|
| 62 |
-
# 🆕 إضافة stop tokens لـ Gemma
|
| 63 |
eos_token_id=pipeline_model.tokenizer.eos_token_id,
|
| 64 |
)
|
| 65 |
return outputs[0]["generated_text"]
|
|
@@ -76,15 +75,31 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
|
|
| 76 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
|
| 77 |
"""
|
| 78 |
try:
|
| 79 |
-
# Build messages list
|
| 80 |
-
messages = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# Add conversation history
|
| 83 |
-
# When type="messages", history is a list of message dicts with 'role' and 'content'
|
| 84 |
if history:
|
| 85 |
for msg in history:
|
| 86 |
if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
# Add current user message
|
| 90 |
if isinstance(message, dict):
|
|
@@ -92,10 +107,15 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
|
|
| 92 |
else:
|
| 93 |
current_message = str(message)
|
| 94 |
|
| 95 |
-
messages.append({
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
# Debug: print messages structure
|
| 98 |
print(f"Messages sent to model: {len(messages)} messages")
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# Generate response
|
| 101 |
response = generate_with_pipeline(
|
|
@@ -124,10 +144,10 @@ demo = gr.ChatInterface(
|
|
| 124 |
fn=generate_response,
|
| 125 |
additional_inputs=[
|
| 126 |
gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
|
| 127 |
-
gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=0
|
| 128 |
gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
|
| 129 |
-
gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=
|
| 130 |
-
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.1
|
| 131 |
],
|
| 132 |
examples=[
|
| 133 |
["النت عندي معطل من الصبح، تقدر تساعدني؟"],
|
|
|
|
| 24 |
hf_token = os.getenv("HF_TOKEN")
|
| 25 |
|
| 26 |
# Initialize pipeline for chat
|
| 27 |
+
# ✅ استخدم image-text-to-text حتى لو text-only
|
| 28 |
pipeline_model = pipeline(
|
| 29 |
+
"image-text-to-text", # ✅ الصحيح لـ Gemma 3
|
| 30 |
model=model_path,
|
| 31 |
device=0, # Use GPU device directly
|
| 32 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 59 |
repetition_penalty=repetition_penalty,
|
| 60 |
do_sample=True,
|
| 61 |
return_full_text=False,
|
|
|
|
| 62 |
eos_token_id=pipeline_model.tokenizer.eos_token_id,
|
| 63 |
)
|
| 64 |
return outputs[0]["generated_text"]
|
|
|
|
| 75 |
max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
|
| 76 |
"""
|
| 77 |
try:
|
| 78 |
+
# ✅ Build messages list with system prompt as first user message
|
| 79 |
+
messages = []
|
| 80 |
+
|
| 81 |
+
# ✅ System prompt as first user message + model acknowledgment
|
| 82 |
+
messages.append({
|
| 83 |
+
"role": "user",
|
| 84 |
+
"content": DEFAULT_SYSTEM_PROMPT
|
| 85 |
+
})
|
| 86 |
+
messages.append({
|
| 87 |
+
"role": "model", # ✅ في Gemma 3 استخدم "model" مو "assistant"
|
| 88 |
+
"content": "Understood. I will follow these instructions."
|
| 89 |
+
})
|
| 90 |
|
| 91 |
# Add conversation history
|
|
|
|
| 92 |
if history:
|
| 93 |
for msg in history:
|
| 94 |
if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
|
| 95 |
+
# ✅ تحويل assistant → model إذا لزم
|
| 96 |
+
role = msg['role']
|
| 97 |
+
if role == 'assistant':
|
| 98 |
+
role = 'model'
|
| 99 |
+
messages.append({
|
| 100 |
+
"role": role,
|
| 101 |
+
"content": msg['content']
|
| 102 |
+
})
|
| 103 |
|
| 104 |
# Add current user message
|
| 105 |
if isinstance(message, dict):
|
|
|
|
| 107 |
else:
|
| 108 |
current_message = str(message)
|
| 109 |
|
| 110 |
+
messages.append({
|
| 111 |
+
"role": "user",
|
| 112 |
+
"content": current_message
|
| 113 |
+
})
|
| 114 |
|
| 115 |
# Debug: print messages structure
|
| 116 |
print(f"Messages sent to model: {len(messages)} messages")
|
| 117 |
+
for i, msg in enumerate(messages):
|
| 118 |
+
print(f" {i}: {msg['role']}: {msg['content'][:50]}...")
|
| 119 |
|
| 120 |
# Generate response
|
| 121 |
response = generate_with_pipeline(
|
|
|
|
| 144 |
fn=generate_response,
|
| 145 |
additional_inputs=[
|
| 146 |
gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
|
| 147 |
+
gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0), # ✅ Gemma يفضل 1.0
|
| 148 |
gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
|
| 149 |
+
gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64), # ✅ Gemma يفضل 64
|
| 150 |
+
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0) # ✅ 1.0 = disabled
|
| 151 |
],
|
| 152 |
examples=[
|
| 153 |
["النت عندي معطل من الصبح، تقدر تساعدني؟"],
|