anaspro commited on
Commit
8038e28
·
1 Parent(s): 154d3ef
Files changed (1) hide show
  1. app.py +31 -11
app.py CHANGED
@@ -24,9 +24,9 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
24
  hf_token = os.getenv("HF_TOKEN")
25
 
26
  # Initialize pipeline for chat
27
- # For quantized models, use device=0 instead of device_map="auto" to avoid meta tensor issues
28
  pipeline_model = pipeline(
29
- "text-generation",
30
  model=model_path,
31
  device=0, # Use GPU device directly
32
  torch_dtype=torch.bfloat16,
@@ -59,7 +59,6 @@ def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=
59
  repetition_penalty=repetition_penalty,
60
  do_sample=True,
61
  return_full_text=False,
62
- # 🆕 إضافة stop tokens لـ Gemma
63
  eos_token_id=pipeline_model.tokenizer.eos_token_id,
64
  )
65
  return outputs[0]["generated_text"]
@@ -76,15 +75,31 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
76
  max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
77
  """
78
  try:
79
- # Build messages list starting with system prompt
80
- messages = [{"role": "user", "content": DEFAULT_SYSTEM_PROMPT}]
 
 
 
 
 
 
 
 
 
 
81
 
82
  # Add conversation history
83
- # When type="messages", history is a list of message dicts with 'role' and 'content'
84
  if history:
85
  for msg in history:
86
  if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
87
- messages.append({"role": msg['role'], "content": msg['content']})
 
 
 
 
 
 
 
88
 
89
  # Add current user message
90
  if isinstance(message, dict):
@@ -92,10 +107,15 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
92
  else:
93
  current_message = str(message)
94
 
95
- messages.append({"role": "user", "content": current_message})
 
 
 
96
 
97
  # Debug: print messages structure
98
  print(f"Messages sent to model: {len(messages)} messages")
 
 
99
 
100
  # Generate response
101
  response = generate_with_pipeline(
@@ -124,10 +144,10 @@ demo = gr.ChatInterface(
124
  fn=generate_response,
125
  additional_inputs=[
126
  gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
127
- gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
128
  gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
129
- gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40),
130
- gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
131
  ],
132
  examples=[
133
  ["النت عندي معطل من الصبح، تقدر تساعدني؟"],
 
24
  hf_token = os.getenv("HF_TOKEN")
25
 
26
  # Initialize pipeline for chat
27
+ # استخدم image-text-to-text حتى لو text-only
28
  pipeline_model = pipeline(
29
+ "image-text-to-text", # ✅ الصحيح لـ Gemma 3
30
  model=model_path,
31
  device=0, # Use GPU device directly
32
  torch_dtype=torch.bfloat16,
 
59
  repetition_penalty=repetition_penalty,
60
  do_sample=True,
61
  return_full_text=False,
 
62
  eos_token_id=pipeline_model.tokenizer.eos_token_id,
63
  )
64
  return outputs[0]["generated_text"]
 
75
  max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
76
  """
77
  try:
78
+ # Build messages list with system prompt as first user message
79
+ messages = []
80
+
81
+ # ✅ System prompt as first user message + model acknowledgment
82
+ messages.append({
83
+ "role": "user",
84
+ "content": DEFAULT_SYSTEM_PROMPT
85
+ })
86
+ messages.append({
87
+ "role": "model", # ✅ في Gemma 3 استخدم "model" مو "assistant"
88
+ "content": "Understood. I will follow these instructions."
89
+ })
90
 
91
  # Add conversation history
 
92
  if history:
93
  for msg in history:
94
  if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
95
+ # تحويل assistant → model إذا لزم
96
+ role = msg['role']
97
+ if role == 'assistant':
98
+ role = 'model'
99
+ messages.append({
100
+ "role": role,
101
+ "content": msg['content']
102
+ })
103
 
104
  # Add current user message
105
  if isinstance(message, dict):
 
107
  else:
108
  current_message = str(message)
109
 
110
+ messages.append({
111
+ "role": "user",
112
+ "content": current_message
113
+ })
114
 
115
  # Debug: print messages structure
116
  print(f"Messages sent to model: {len(messages)} messages")
117
+ for i, msg in enumerate(messages):
118
+ print(f" {i}: {msg['role']}: {msg['content'][:50]}...")
119
 
120
  # Generate response
121
  response = generate_with_pipeline(
 
144
  fn=generate_response,
145
  additional_inputs=[
146
  gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
147
+ gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0), # ✅ Gemma يفضل 1.0
148
  gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
149
+ gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64), # ✅ Gemma يفضل 64
150
+ gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0) # ✅ 1.0 = disabled
151
  ],
152
  examples=[
153
  ["النت عندي معطل من الصبح، تقدر تساعدني؟"],