Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 25, 2025

Commit

8038e28

1 Parent(s): 154d3ef

upadte

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -24,9 +24,9 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
 hf_token = os.getenv("HF_TOKEN")
 # Initialize pipeline for chat
-# For quantized models, use device=0 instead of device_map="auto" to avoid meta tensor issues
 pipeline_model = pipeline(
-    "text-generation",
     model=model_path,
     device=0,  # Use GPU device directly
     torch_dtype=torch.bfloat16,
@@ -59,7 +59,6 @@ def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=
         repetition_penalty=repetition_penalty,
         do_sample=True,
         return_full_text=False,
-        # 🆕 إضافة stop tokens لـ Gemma
         eos_token_id=pipeline_model.tokenizer.eos_token_id,
     )
     return outputs[0]["generated_text"]
@@ -76,15 +75,31 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
         max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
     """
     try:
-        # Build messages list starting with system prompt
-        messages = [{"role": "user", "content": DEFAULT_SYSTEM_PROMPT}]
         # Add conversation history
-        # When type="messages", history is a list of message dicts with 'role' and 'content'
         if history:
             for msg in history:
                 if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
-                    messages.append({"role": msg['role'], "content": msg['content']})
         # Add current user message
         if isinstance(message, dict):
@@ -92,10 +107,15 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
         else:
             current_message = str(message)
-        messages.append({"role": "user", "content": current_message})
         # Debug: print messages structure
         print(f"Messages sent to model: {len(messages)} messages")
         # Generate response
         response = generate_with_pipeline(
@@ -124,10 +144,10 @@ demo = gr.ChatInterface(
     fn=generate_response,
     additional_inputs=[
         gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
-        gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
         gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
-        gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=40),
-        gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.1)
     ],
     examples=[
         ["النت عندي معطل من الصبح، تقدر تساعدني؟"],

 hf_token = os.getenv("HF_TOKEN")
 # Initialize pipeline for chat
+# ✅ استخدم image-text-to-text حتى لو text-only
 pipeline_model = pipeline(
+    "image-text-to-text",  # ✅ الصحيح لـ Gemma 3
     model=model_path,
     device=0,  # Use GPU device directly
     torch_dtype=torch.bfloat16,
         repetition_penalty=repetition_penalty,
         do_sample=True,
         return_full_text=False,
         eos_token_id=pipeline_model.tokenizer.eos_token_id,
     )
     return outputs[0]["generated_text"]
         max_new_tokens, temperature, top_p, top_k, repetition_penalty: Generation parameters
     """
     try:
+        # ✅ Build messages list with system prompt as first user message
+        messages = []
+        # ✅ System prompt as first user message + model acknowledgment
+        messages.append({
+            "role": "user",
+            "content": DEFAULT_SYSTEM_PROMPT
+        })
+        messages.append({
+            "role": "model",  # ✅ في Gemma 3 استخدم "model" مو "assistant"
+            "content": "Understood. I will follow these instructions."
+        })
         # Add conversation history
         if history:
             for msg in history:
                 if isinstance(msg, dict) and 'role' in msg and 'content' in msg:
+                    # ✅ تحويل assistant → model إذا لزم
+                    role = msg['role']
+                    if role == 'assistant':
+                        role = 'model'
+                    messages.append({
+                        "role": role,
+                        "content": msg['content']
+                    })
         # Add current user message
         if isinstance(message, dict):
         else:
             current_message = str(message)
+        messages.append({
+            "role": "user",
+            "content": current_message
+        })
         # Debug: print messages structure
         print(f"Messages sent to model: {len(messages)} messages")
+        for i, msg in enumerate(messages):
+            print(f"  {i}: {msg['role']}: {msg['content'][:50]}...")
         # Generate response
         response = generate_with_pipeline(
     fn=generate_response,
     additional_inputs=[
         gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
+        gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0),  # ✅ Gemma يفضل 1.0
         gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
+        gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64),  # ✅ Gemma يفضل 64
+        gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)  # ✅ 1.0 = disabled
     ],
     examples=[
         ["النت عندي معطل من الصبح، تقدر تساعدني؟"],