anaspro
commited on
Commit
·
24d5388
1
Parent(s):
79983eb
updatE
Browse files
app.py
CHANGED
|
@@ -27,12 +27,12 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 27 |
|
| 28 |
model = AutoModelForCausalLM.from_pretrained(
|
| 29 |
model_path,
|
| 30 |
-
device_map=
|
| 31 |
trust_remote_code=True,
|
| 32 |
token=hf_token,
|
| 33 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 34 |
low_cpu_mem_usage=True
|
| 35 |
-
)
|
| 36 |
print("تم تحميل المودل بنجاح!")
|
| 37 |
|
| 38 |
if tokenizer.pad_token is None:
|
|
@@ -46,6 +46,7 @@ def get_response(text, tokenizer=tokenizer, model=model):
|
|
| 46 |
generate_ids = model.generate(
|
| 47 |
input_ids,
|
| 48 |
attention_mask=attention_mask,
|
|
|
|
| 49 |
top_p=0.8,
|
| 50 |
temperature=0.2,
|
| 51 |
max_length=input_len + 256, # Limit response length to prevent multiple responses
|
|
|
|
| 27 |
|
| 28 |
model = AutoModelForCausalLM.from_pretrained(
|
| 29 |
model_path,
|
| 30 |
+
device_map=None, # إزالة device_map لتجنب مشاكل مع past_key_values
|
| 31 |
trust_remote_code=True,
|
| 32 |
token=hf_token,
|
| 33 |
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 34 |
low_cpu_mem_usage=True
|
| 35 |
+
).to(device) # نقل المودل إلى الجهاز المحدد
|
| 36 |
print("تم تحميل المودل بنجاح!")
|
| 37 |
|
| 38 |
if tokenizer.pad_token is None:
|
|
|
|
| 46 |
generate_ids = model.generate(
|
| 47 |
input_ids,
|
| 48 |
attention_mask=attention_mask,
|
| 49 |
+
past_key_values=None, # إضافة past_key_values صراحة لتجنب الأخطاء
|
| 50 |
top_p=0.8,
|
| 51 |
temperature=0.2,
|
| 52 |
max_length=input_len + 256, # Limit response length to prevent multiple responses
|