Spaces:

Deva1211
/

chatbot

Sleeping

App Files Files Community

chatbot / test_model.py

Deva1211

Switched to resolving issues

c422049 8 months ago

raw

history blame

3.09 kB

	#!/usr/bin/env python3
	"""
	Test script to validate Mistral-7B-Instruct AWQ model response generation
	"""

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	def test_model():
	print("Loading Mistral-7B-Instruct AWQ for testing...")

	# Try AWQ model first, fallback to regular model if needed
	try:
	tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-AWQ")
	model = AutoModelForCausalLM.from_pretrained(
	"TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
	device_map="auto",
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True
	)
	print("✅ AWQ model loaded successfully!")
	except Exception as e:
	print(f"⚠️ AWQ model failed to load: {e}")
	print("📦 Falling back to regular Mistral-7B-Instruct-v0.2...")
	tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
	model = AutoModelForCausalLM.from_pretrained(
	"mistralai/Mistral-7B-Instruct-v0.2",
	device_map="auto",
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True
	)
	print("✅ Regular model loaded successfully!")

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Model loaded successfully!")

	# Test conversation
	test_messages = [
	"I feel sad today",
	"What should I do?",
	"Hello"
	]

	for i, message in enumerate(test_messages):
	print(f"\n--- Test {i+1}: '{message}' ---")

	# Use Mistral chat template format
	messages = [
	{"role": "user", "content": message}
	]

	# Apply chat template
	conversation = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	input_ids = tokenizer.encode(conversation, return_tensors="pt")

	# Generate response with settings optimized for Mistral AWQ
	with torch.no_grad():
	chat_history_ids = model.generate(
	input_ids.to(model.device),
	max_new_tokens=100,
	no_repeat_ngram_size=2,
	do_sample=True,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	temperature=0.9,
	top_k=50,
	top_p=0.9,
	use_cache=True
	)

	# Decode response
	response = tokenizer.decode(
	chat_history_ids[:, input_ids.shape[-1]:][0],
	skip_special_tokens=True
	).strip()

	print(f"Raw response: '{response}'")
	print(f"Response length: {len(response)} characters")

	if len(response) > 1:
	print("✅ Good response generated")
	else:
	print("⚠️ Short/empty response")

	print("\n✅ Model testing complete!")

	if __name__ == "__main__":
	test_model()