anaspro
commited on
Commit
·
afb7b1a
1
Parent(s):
8038e28
upadte
Browse files
app.py
CHANGED
|
@@ -23,17 +23,15 @@ model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
|
|
| 23 |
# إذا كان فيه HF_TOKEN في البيئة
|
| 24 |
hf_token = os.getenv("HF_TOKEN")
|
| 25 |
|
| 26 |
-
# Initialize pipeline for chat
|
| 27 |
-
# ✅ استخدم image-text-to-text حتى لو text-only
|
| 28 |
pipeline_model = pipeline(
|
| 29 |
-
"image-text-to-text",
|
| 30 |
model=model_path,
|
| 31 |
device=0, # Use GPU device directly
|
| 32 |
-
torch_dtype=torch.bfloat16,
|
| 33 |
token=hf_token,
|
| 34 |
trust_remote_code=True,
|
| 35 |
model_kwargs={
|
| 36 |
-
"torch_dtype": torch.bfloat16,
|
| 37 |
"load_in_4bit": True,
|
| 38 |
"bnb_4bit_compute_dtype": torch.bfloat16,
|
| 39 |
"bnb_4bit_use_double_quant": False,
|
|
@@ -114,8 +112,6 @@ def generate_response(message, history, max_new_tokens, temperature, top_p, top_
|
|
| 114 |
|
| 115 |
# Debug: print messages structure
|
| 116 |
print(f"Messages sent to model: {len(messages)} messages")
|
| 117 |
-
for i, msg in enumerate(messages):
|
| 118 |
-
print(f" {i}: {msg['role']}: {msg['content'][:50]}...")
|
| 119 |
|
| 120 |
# Generate response
|
| 121 |
response = generate_with_pipeline(
|
|
@@ -144,10 +140,10 @@ demo = gr.ChatInterface(
|
|
| 144 |
fn=generate_response,
|
| 145 |
additional_inputs=[
|
| 146 |
gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
|
| 147 |
-
gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
|
| 148 |
gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
|
| 149 |
-
gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64),
|
| 150 |
-
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
|
| 151 |
],
|
| 152 |
examples=[
|
| 153 |
["النت عندي معطل من الصبح، تقدر تساعدني؟"],
|
|
|
|
| 23 |
# إذا كان فيه HF_TOKEN في البيئة
|
| 24 |
hf_token = os.getenv("HF_TOKEN")
|
| 25 |
|
| 26 |
+
# ✅ Initialize pipeline for chat - إصلاح مشكلة torch_dtype
|
|
|
|
| 27 |
pipeline_model = pipeline(
|
| 28 |
+
"image-text-to-text",
|
| 29 |
model=model_path,
|
| 30 |
device=0, # Use GPU device directly
|
|
|
|
| 31 |
token=hf_token,
|
| 32 |
trust_remote_code=True,
|
| 33 |
model_kwargs={
|
| 34 |
+
"torch_dtype": torch.bfloat16, # ✅ فقط هنا
|
| 35 |
"load_in_4bit": True,
|
| 36 |
"bnb_4bit_compute_dtype": torch.bfloat16,
|
| 37 |
"bnb_4bit_use_double_quant": False,
|
|
|
|
| 112 |
|
| 113 |
# Debug: print messages structure
|
| 114 |
print(f"Messages sent to model: {len(messages)} messages")
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Generate response
|
| 117 |
response = generate_with_pipeline(
|
|
|
|
| 140 |
fn=generate_response,
|
| 141 |
additional_inputs=[
|
| 142 |
gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
|
| 143 |
+
gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
|
| 144 |
gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.95),
|
| 145 |
+
gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=64),
|
| 146 |
+
gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
|
| 147 |
],
|
| 148 |
examples=[
|
| 149 |
["النت عندي معطل من الصبح، تقدر تساعدني؟"],
|