Spaces:

anaspro
/

chatbox

Runtime error

chatbox / test_model.py

anaspro

upadte

154d3ef about 2 months ago

1.75 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import os
	import torch
	import transformers
	from transformers import pipeline

	model_path = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"

	# إذا كان فيه HF_TOKEN في البيئة
	hf_token = os.getenv("HF_TOKEN")

	print("Loading model...")
	try:
	# Initialize pipeline for chat
	# For quantized models, use device=0 instead of device_map="auto" to avoid meta tensor issues
	pipeline_model = pipeline(
	"text-generation",
	model=model_path,
	device=0, # Use GPU device directly
	torch_dtype=torch.bfloat16,
	token=hf_token,
	trust_remote_code=True,
	model_kwargs={
	"torch_dtype": torch.bfloat16,
	"load_in_4bit": True,
	"bnb_4bit_compute_dtype": torch.bfloat16,
	"bnb_4bit_use_double_quant": False,
	"bnb_4bit_quant_type": "nf4",
	}
	)

	print("Model loaded successfully!")

	# Test with a simple message
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Hello!"},
	]

	print("Testing generation...")
	# Apply chat template for unsloth models
	prompt = pipeline_model.tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	outputs = pipeline_model(
	prompt,
	max_new_tokens=50,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	return_full_text=False
	)

	response = outputs[0]["generated_text"]
	print(f"Test response: {response}")
	print("✅ Model test successful!")

	except Exception as e:
	print(f"❌ Error: {e}")
	import traceback
	traceback.print_exc()