anaspro
commited on
Commit
·
b5d6ec2
1
Parent(s):
afb7b1a
upadte
Browse files
app.py
CHANGED
|
@@ -30,13 +30,7 @@ pipeline_model = pipeline(
|
|
| 30 |
device=0, # Use GPU device directly
|
| 31 |
token=hf_token,
|
| 32 |
trust_remote_code=True,
|
| 33 |
-
|
| 34 |
-
"torch_dtype": torch.bfloat16, # ✅ فقط هنا
|
| 35 |
-
"load_in_4bit": True,
|
| 36 |
-
"bnb_4bit_compute_dtype": torch.bfloat16,
|
| 37 |
-
"bnb_4bit_use_double_quant": False,
|
| 38 |
-
"bnb_4bit_quant_type": "nf4",
|
| 39 |
-
}
|
| 40 |
)
|
| 41 |
|
| 42 |
def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
|
|
|
|
| 30 |
device=0, # Use GPU device directly
|
| 31 |
token=hf_token,
|
| 32 |
trust_remote_code=True,
|
| 33 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
)
|
| 35 |
|
| 36 |
def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
|