chatbox / test_model.py
anaspro
upadte
154d3ef
raw
history blame
1.75 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import torch
import transformers
from transformers import pipeline
model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
# إذا كان فيه HF_TOKEN في البيئة
hf_token = os.getenv("HF_TOKEN")
print("Loading model...")
try:
# Initialize pipeline for chat
# For quantized models, use device=0 instead of device_map="auto" to avoid meta tensor issues
pipeline_model = pipeline(
"text-generation",
model=model_path,
device=0, # Use GPU device directly
torch_dtype=torch.bfloat16,
token=hf_token,
trust_remote_code=True,
model_kwargs={
"torch_dtype": torch.bfloat16,
"load_in_4bit": True,
"bnb_4bit_compute_dtype": torch.bfloat16,
"bnb_4bit_use_double_quant": False,
"bnb_4bit_quant_type": "nf4",
}
)
print("Model loaded successfully!")
# Test with a simple message
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
]
print("Testing generation...")
# Apply chat template for unsloth models
prompt = pipeline_model.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
outputs = pipeline_model(
prompt,
max_new_tokens=50,
temperature=0.7,
top_p=0.9,
do_sample=True,
return_full_text=False
)
response = outputs[0]["generated_text"]
print(f"Test response: {response}")
print("✅ Model test successful!")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()