ksjpswaroop
/

zindango-slm

Text Generation

instruction-tuned

text-generation-inference

Model card Files Files and versions

zindango-slm / scripts /test_zindango_gguf.py

ksjpswaroop's picture

Add scripts/test_zindango_gguf.py

7ce0644 verified 9 days ago

history blame contribute delete

3.35 kB

	#!/usr/bin/env python3
	"""
	Test zindango-slm: GGUF (llama-cpp-python) or HF (transformers) fallback.
	Runs a single prompt to verify the model loads and generates.
	"""
	import os
	import sys


	def test_gguf(gguf_path: str) -> bool:
	"""Test via llama-cpp-python if available."""
	try:
	from llama_cpp import Llama
	except ImportError:
	return False

	print("Loading zindango-slm (GGUF) with llama-cpp-python...")
	llm = Llama(
	model_path=gguf_path,
	n_ctx=512,
	n_threads=os.cpu_count() or 4,
	chat_format="chatml",
	verbose=False,
	)
	messages = [
	{"role": "system", "content": "You are a helpful assistant. Reply briefly."},
	{"role": "user", "content": "Who are you? One sentence only."},
	]
	out = llm.create_chat_completion(messages=messages, max_tokens=64, temperature=0.7)
	reply = out["choices"][0]["message"]["content"]
	print("Reply:", reply)
	return bool(reply.strip())


	def test_transformers(local_path: str \| None = None) -> bool:
	"""Test via transformers (HF model) as fallback when GGUF/llama.cpp unavailable."""
	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	except ImportError:
	print("transformers not installed: pip install transformers torch")
	return False

	model_id = local_path if local_path and os.path.isdir(local_path) else "ksjpswaroop/zindango-slm"
	print(f"Testing zindango-slm (transformers) - fallback when llama-cpp unavailable...")
	tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	trust_remote_code=True,
	torch_dtype="auto",
	low_cpu_mem_usage=True,
	)
	messages = [{"role": "user", "content": "Who are you? One sentence only."}]
	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(text, return_tensors="pt")
	out = model.generate(
	**inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id
	)
	reply = tokenizer.decode(
	out[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True
	)
	print("Reply:", reply)
	return bool(reply.strip())


	def main():
	script_dir = os.path.dirname(os.path.abspath(__file__))
	project_root = os.path.dirname(script_dir)
	model_dir = os.path.join(project_root, "models", "zindango-slm")

	# Prefer Q8_0, then f16
	for name in ("zindango-slm-Q8_0.gguf", "zindango-slm-f16.gguf"):
	gguf_path = os.path.join(model_dir, name)
	if os.path.isfile(gguf_path):
	print(f"Trying GGUF: {gguf_path}")
	if test_gguf(gguf_path):
	print("\n[OK] zindango-slm GGUF test passed.")
	return 0
	break

	print("\nllama-cpp-python unavailable or failed. Using transformers fallback...")
	local_hf = os.path.join(project_root, "models", "zindango-slm-hf")
	if test_transformers(local_hf):
	print("\n[OK] zindango-slm transformers test passed.")
	return 0

	print("\n[FAIL] No working backend. Install: pip install transformers torch")
	print("For GGUF: pip install llama-cpp-python")
	return 1


	if __name__ == "__main__":
	sys.exit(main())