anaspro commited on
Commit
bd45f32
·
1 Parent(s): da09e0f
Files changed (1) hide show
  1. app.py +41 -26
app.py CHANGED
@@ -108,34 +108,49 @@ def generate_response(input_data, chat_history, max_new_tokens, temperature, top
108
  # Add current user message
109
  messages.append({"role": "user", "content": input_data})
110
 
111
- # Validation للقيم عشان ما تحصل CUDA errors
112
- temperature = max(0.1, min(2.0, temperature)) # 0.1 to 2.0
113
- top_p = max(0.1, min(1.0, top_p)) # 0.1 to 1.0
114
- top_k = max(1, min(100, top_k)) # 1 to 100
115
- repetition_penalty = max(1.0, min(1.5, repetition_penalty)) # 1.0 to 1.5
116
-
117
- # استخدام ChatPipeline المخصص مع streaming
118
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
119
-
120
- generation_kwargs = pipe(
121
- messages,
122
- streamer=streamer,
123
- max_new_tokens=max_new_tokens,
124
- temperature=temperature,
125
- top_p=top_p,
126
- top_k=top_k,
127
- repetition_penalty=repetition_penalty
128
- )
129
-
130
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
131
- thread.start()
132
-
133
- # Stream the response
134
- response = ""
135
- for chunk in streamer:
136
- response += chunk
 
 
 
 
 
 
 
 
 
 
137
  yield response
138
 
 
 
 
 
 
139
  demo = gr.ChatInterface(
140
  fn=generate_response,
141
  additional_inputs=[
 
108
  # Add current user message
109
  messages.append({"role": "user", "content": input_data})
110
 
111
+ # استخدام generate مباشرة مع parameters أكثر أماناً
112
+ try:
113
+ # محاولة استخدام chat template
114
+ if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
115
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
116
+ else:
117
+ # Fallback format
118
+ prompt = f"System: {DEFAULT_SYSTEM_PROMPT}\n\n"
119
+ for msg in messages[1:]: # Skip system message since we added it above
120
+ if msg["role"] == "user":
121
+ prompt += f"Human: {msg['content']}\n"
122
+ elif msg["role"] == "assistant":
123
+ prompt += f"Assistant: {msg['content']}\n"
124
+ prompt += "Assistant:"
125
+
126
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
127
+
128
+ # استخدام generate مع parameters أساسية وآمنة
129
+ with torch.no_grad():
130
+ outputs = model.generate(
131
+ **inputs,
132
+ max_new_tokens=min(max_new_tokens, 512), # حد أقصى أمان
133
+ do_sample=False, # تعطيل sampling للأمان
134
+ num_beams=1, # greedy decoding
135
+ pad_token_id=tokenizer.eos_token_id,
136
+ eos_token_id=tokenizer.eos_token_id,
137
+ return_dict_in_generate=True,
138
+ output_scores=False,
139
+ )
140
+
141
+ response = tokenizer.decode(outputs.sequences[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
142
+ response = response.strip()
143
+
144
+ if not response:
145
+ response = "آسف، حدث خطأ في توليد الرد. حاول مرة ثانية."
146
+
147
  yield response
148
 
149
+ except Exception as e:
150
+ error_msg = f"خطأ في التوليد: {str(e)}"
151
+ print(error_msg)
152
+ yield "آسف، حدث خطأ تقني. حاول مرة ثانية."
153
+
154
  demo = gr.ChatInterface(
155
  fn=generate_response,
156
  additional_inputs=[