Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 24

Commit

6da46a3

1 Parent(s): bd45f32

update

Browse files

Files changed (1) hide show

app.py +46 -2

app.py CHANGED Viewed

@@ -96,6 +96,33 @@ def format_conversation_history(chat_history):
 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
     # Build messages for Llama chat template
     messages = [{"role": "system", "content": DEFAULT_SYSTEM_PROMPT}]
@@ -113,6 +140,7 @@ def generate_response(input_data, chat_history, max_new_tokens, temperature, top
         # محاولة استخدام chat template
         if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
             prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         else:
             # Fallback format
             prompt = f"System: {DEFAULT_SYSTEM_PROMPT}\n\n"
@@ -122,8 +150,12 @@ def generate_response(input_data, chat_history, max_new_tokens, temperature, top
                 elif msg["role"] == "assistant":
                     prompt += f"Assistant: {msg['content']}\n"
             prompt += "Assistant:"
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         # استخدام generate مع parameters أساسية وآمنة
         with torch.no_grad():
@@ -138,18 +170,30 @@ def generate_response(input_data, chat_history, max_new_tokens, temperature, top
                 output_scores=False,
             )
         response = tokenizer.decode(outputs.sequences[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         response = response.strip()
         if not response:
-            response = "آسف، حدث خطأ في توليد الرد. حاول مرة ثانية."
         yield response
     except Exception as e:
         error_msg = f"خطأ في التوليد: {str(e)}"
         print(error_msg)
-        yield "آسف، حدث خطأ تقني. حاول مرة ثانية."
 demo = gr.ChatInterface(
     fn=generate_response,

 @spaces.GPU()
 def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
+    # Test بسيط أولاً
+    try:
+        # رسالة test بسيطة
+        test_prompt = "السلام عليكم"
+        inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
+        print(f"Input shape: {inputs.input_ids.shape}")  # Debug
+        print(f"Input tokens: {inputs.input_ids[0][:10]}")  # Debug
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=50,  # قصير للاختبار
+                do_sample=False,
+                num_beams=1,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+            )
+        test_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Test response: {test_response}")  # Debug
+    except Exception as e:
+        print(f"Test failed: {e}")
+        import traceback
+        print(traceback.format_exc())
     # Build messages for Llama chat template
     messages = [{"role": "system", "content": DEFAULT_SYSTEM_PROMPT}]
         # محاولة استخدام chat template
         if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
             prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+            print(f"Using chat template. Prompt length: {len(prompt)}")  # Debug
         else:
             # Fallback format
             prompt = f"System: {DEFAULT_SYSTEM_PROMPT}\n\n"
                 elif msg["role"] == "assistant":
                     prompt += f"Assistant: {msg['content']}\n"
             prompt += "Assistant:"
+            print(f"Using fallback format. Prompt length: {len(prompt)}")  # Debug
+        print(f"Final prompt: {prompt[:200]}...")  # Debug first 200 chars
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        print(f"Tokenized input shape: {inputs.input_ids.shape}")  # Debug
         # استخدام generate مع parameters أساسية وآمنة
         with torch.no_grad():
                 output_scores=False,
             )
+        print(f"Generated sequence shape: {outputs.sequences.shape}")  # Debug
+        print(f"Input length: {inputs.input_ids.shape[1]}")  # Debug
         response = tokenizer.decode(outputs.sequences[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         response = response.strip()
+        print(f"Generated response length: {len(response)}")  # Debug
+        print(f"Response preview: {response[:100]}...")  # Debug
         if not response:
+            print("Empty response, using fallback")  # Debug
+            response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
         yield response
     except Exception as e:
         error_msg = f"خطأ في التوليد: {str(e)}"
         print(error_msg)
+        print(f"Error type: {type(e)}")  # Debug
+        import traceback
+        print("Traceback:")
+        print(traceback.format_exc())  # Debug
+        yield "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"
 demo = gr.ChatInterface(
     fn=generate_response,