| | |
| | """ |
| | Test zindango-slm: GGUF (llama-cpp-python) or HF (transformers) fallback. |
| | Runs a single prompt to verify the model loads and generates. |
| | """ |
| | import os |
| | import sys |
| |
|
| |
|
| | def test_gguf(gguf_path: str) -> bool: |
| | """Test via llama-cpp-python if available.""" |
| | try: |
| | from llama_cpp import Llama |
| | except ImportError: |
| | return False |
| |
|
| | print("Loading zindango-slm (GGUF) with llama-cpp-python...") |
| | llm = Llama( |
| | model_path=gguf_path, |
| | n_ctx=512, |
| | n_threads=os.cpu_count() or 4, |
| | chat_format="chatml", |
| | verbose=False, |
| | ) |
| | messages = [ |
| | {"role": "system", "content": "You are a helpful assistant. Reply briefly."}, |
| | {"role": "user", "content": "Who are you? One sentence only."}, |
| | ] |
| | out = llm.create_chat_completion(messages=messages, max_tokens=64, temperature=0.7) |
| | reply = out["choices"][0]["message"]["content"] |
| | print("Reply:", reply) |
| | return bool(reply.strip()) |
| |
|
| |
|
| | def test_transformers(local_path: str | None = None) -> bool: |
| | """Test via transformers (HF model) as fallback when GGUF/llama.cpp unavailable.""" |
| | try: |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | except ImportError: |
| | print("transformers not installed: pip install transformers torch") |
| | return False |
| |
|
| | model_id = local_path if local_path and os.path.isdir(local_path) else "ksjpswaroop/zindango-slm" |
| | print(f"Testing zindango-slm (transformers) - fallback when llama-cpp unavailable...") |
| | tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_id, |
| | trust_remote_code=True, |
| | torch_dtype="auto", |
| | low_cpu_mem_usage=True, |
| | ) |
| | messages = [{"role": "user", "content": "Who are you? One sentence only."}] |
| | text = tokenizer.apply_chat_template( |
| | messages, tokenize=False, add_generation_prompt=True |
| | ) |
| | inputs = tokenizer(text, return_tensors="pt") |
| | out = model.generate( |
| | **inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id |
| | ) |
| | reply = tokenizer.decode( |
| | out[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True |
| | ) |
| | print("Reply:", reply) |
| | return bool(reply.strip()) |
| |
|
| |
|
| | def main(): |
| | script_dir = os.path.dirname(os.path.abspath(__file__)) |
| | project_root = os.path.dirname(script_dir) |
| | model_dir = os.path.join(project_root, "models", "zindango-slm") |
| |
|
| | |
| | for name in ("zindango-slm-Q8_0.gguf", "zindango-slm-f16.gguf"): |
| | gguf_path = os.path.join(model_dir, name) |
| | if os.path.isfile(gguf_path): |
| | print(f"Trying GGUF: {gguf_path}") |
| | if test_gguf(gguf_path): |
| | print("\n[OK] zindango-slm GGUF test passed.") |
| | return 0 |
| | break |
| |
|
| | print("\nllama-cpp-python unavailable or failed. Using transformers fallback...") |
| | local_hf = os.path.join(project_root, "models", "zindango-slm-hf") |
| | if test_transformers(local_hf): |
| | print("\n[OK] zindango-slm transformers test passed.") |
| | return 0 |
| |
|
| | print("\n[FAIL] No working backend. Install: pip install transformers torch") |
| | print("For GGUF: pip install llama-cpp-python") |
| | return 1 |
| |
|
| |
|
| | if __name__ == "__main__": |
| | sys.exit(main()) |
| |
|