zindango-slm / scripts /test_zindango_gguf.py
ksjpswaroop's picture
Add scripts/test_zindango_gguf.py
7ce0644 verified
#!/usr/bin/env python3
"""
Test zindango-slm: GGUF (llama-cpp-python) or HF (transformers) fallback.
Runs a single prompt to verify the model loads and generates.
"""
import os
import sys
def test_gguf(gguf_path: str) -> bool:
"""Test via llama-cpp-python if available."""
try:
from llama_cpp import Llama
except ImportError:
return False
print("Loading zindango-slm (GGUF) with llama-cpp-python...")
llm = Llama(
model_path=gguf_path,
n_ctx=512,
n_threads=os.cpu_count() or 4,
chat_format="chatml",
verbose=False,
)
messages = [
{"role": "system", "content": "You are a helpful assistant. Reply briefly."},
{"role": "user", "content": "Who are you? One sentence only."},
]
out = llm.create_chat_completion(messages=messages, max_tokens=64, temperature=0.7)
reply = out["choices"][0]["message"]["content"]
print("Reply:", reply)
return bool(reply.strip())
def test_transformers(local_path: str | None = None) -> bool:
"""Test via transformers (HF model) as fallback when GGUF/llama.cpp unavailable."""
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
except ImportError:
print("transformers not installed: pip install transformers torch")
return False
model_id = local_path if local_path and os.path.isdir(local_path) else "ksjpswaroop/zindango-slm"
print(f"Testing zindango-slm (transformers) - fallback when llama-cpp unavailable...")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
torch_dtype="auto",
low_cpu_mem_usage=True,
)
messages = [{"role": "user", "content": "Who are you? One sentence only."}]
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt")
out = model.generate(
**inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id
)
reply = tokenizer.decode(
out[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True
)
print("Reply:", reply)
return bool(reply.strip())
def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(script_dir)
model_dir = os.path.join(project_root, "models", "zindango-slm")
# Prefer Q8_0, then f16
for name in ("zindango-slm-Q8_0.gguf", "zindango-slm-f16.gguf"):
gguf_path = os.path.join(model_dir, name)
if os.path.isfile(gguf_path):
print(f"Trying GGUF: {gguf_path}")
if test_gguf(gguf_path):
print("\n[OK] zindango-slm GGUF test passed.")
return 0
break
print("\nllama-cpp-python unavailable or failed. Using transformers fallback...")
local_hf = os.path.join(project_root, "models", "zindango-slm-hf")
if test_transformers(local_hf):
print("\n[OK] zindango-slm transformers test passed.")
return 0
print("\n[FAIL] No working backend. Install: pip install transformers torch")
print("For GGUF: pip install llama-cpp-python")
return 1
if __name__ == "__main__":
sys.exit(main())