anaspro commited on
Commit
afb7b1a
·
1 Parent(s): 8038e28
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -23,17 +23,15 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
23
  # إذا كان فيه HF_TOKEN في البيئة
24
  hf_token = os.getenv("HF_TOKEN")
25
 
26
- # Initialize pipeline for chat
27
- # ✅ استخدم image-text-to-text حتى لو text-only
28
  pipeline_model = pipeline(
29
- "image-text-to-text", # ✅ الصحيح لـ Gemma 3
30
  model=model_path,
31
  device=0, # Use GPU device directly
32
- torch_dtype=torch.bfloat16,
33
  token=hf_token,
34
  trust_remote_code=True,
35
  model_kwargs={
36
- "torch_dtype": torch.bfloat16,
37
  "load_in_4bit": True,
38
  "bnb_4bit_compute_dtype": torch.bfloat16,
39
  "bnb_4bit_use_double_quant": False,
@@ -114,8 +112,6 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
114
 
115
  # Debug: print messages structure
116
  print(f"Messages sent to model: {len(messages)} messages")
117
- for i, msg in enumerate(messages):
118
- print(f" {i}: {msg['role']}: {msg['content'][:50]}...")
119
 
120
  # Generate response
121
  response = generate_with_pipeline(
@@ -144,10 +140,10 @@ demo = gr.ChatInterface(
144
  fn=generate_response,
145
  additional_inputs=[
146
  gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
147
- gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0), # ✅ Gemma يفضل 1.0
148
  gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
149
- gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64), # ✅ Gemma يفضل 64
150
- gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0) # ✅ 1.0 = disabled
151
  ],
152
  examples=[
153
  ["النت عندي معطل من الصبح، تقدر تساعدني؟"],
 
23
  # إذا كان فيه HF_TOKEN في البيئة
24
  hf_token = os.getenv("HF_TOKEN")
25
 
26
+ # Initialize pipeline for chat - إصلاح مشكلة torch_dtype
 
27
  pipeline_model = pipeline(
28
+ "image-text-to-text",
29
  model=model_path,
30
  device=0, # Use GPU device directly
 
31
  token=hf_token,
32
  trust_remote_code=True,
33
  model_kwargs={
34
+ "torch_dtype": torch.bfloat16, # ✅ فقط هنا
35
  "load_in_4bit": True,
36
  "bnb_4bit_compute_dtype": torch.bfloat16,
37
  "bnb_4bit_use_double_quant": False,
 
112
 
113
  # Debug: print messages structure
114
  print(f"Messages sent to model: {len(messages)} messages")
 
 
115
 
116
  # Generate response
117
  response = generate_with_pipeline(
 
140
  fn=generate_response,
141
  additional_inputs=[
142
  gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
143
+ gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
144
  gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
145
+ gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64),
146
+ gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
147
  ],
148
  examples=[
149
  ["النت عندي معطل من الصبح، تقدر تساعدني؟"],