#!/usr/bin/env python3 """ Test zindango-slm: GGUF (llama-cpp-python) or HF (transformers) fallback. Runs a single prompt to verify the model loads and generates. """ import os import sys def test_gguf(gguf_path: str) -> bool: """Test via llama-cpp-python if available.""" try: from llama_cpp import Llama except ImportError: return False print("Loading zindango-slm (GGUF) with llama-cpp-python...") llm = Llama( model_path=gguf_path, n_ctx=512, n_threads=os.cpu_count() or 4, chat_format="chatml", verbose=False, ) messages = [ {"role": "system", "content": "You are a helpful assistant. Reply briefly."}, {"role": "user", "content": "Who are you? One sentence only."}, ] out = llm.create_chat_completion(messages=messages, max_tokens=64, temperature=0.7) reply = out["choices"][0]["message"]["content"] print("Reply:", reply) return bool(reply.strip()) def test_transformers(local_path: str | None = None) -> bool: """Test via transformers (HF model) as fallback when GGUF/llama.cpp unavailable.""" try: from transformers import AutoModelForCausalLM, AutoTokenizer except ImportError: print("transformers not installed: pip install transformers torch") return False model_id = local_path if local_path and os.path.isdir(local_path) else "ksjpswaroop/zindango-slm" print(f"Testing zindango-slm (transformers) - fallback when llama-cpp unavailable...") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype="auto", low_cpu_mem_usage=True, ) messages = [{"role": "user", "content": "Who are you? One sentence only."}] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt") out = model.generate( **inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id ) reply = tokenizer.decode( out[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True ) print("Reply:", reply) return bool(reply.strip()) def main(): script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) model_dir = os.path.join(project_root, "models", "zindango-slm") # Prefer Q8_0, then f16 for name in ("zindango-slm-Q8_0.gguf", "zindango-slm-f16.gguf"): gguf_path = os.path.join(model_dir, name) if os.path.isfile(gguf_path): print(f"Trying GGUF: {gguf_path}") if test_gguf(gguf_path): print("\n[OK] zindango-slm GGUF test passed.") return 0 break print("\nllama-cpp-python unavailable or failed. Using transformers fallback...") local_hf = os.path.join(project_root, "models", "zindango-slm-hf") if test_transformers(local_hf): print("\n[OK] zindango-slm transformers test passed.") return 0 print("\n[FAIL] No working backend. Install: pip install transformers torch") print("For GGUF: pip install llama-cpp-python") return 1 if __name__ == "__main__": sys.exit(main())