# Qwen2.5-Math-1.5B-mobile — Verified Usage Examples # Chat format: chatml # CPU speed: 15.7 t/s # Verified: June 2026 # === Using dispatchai SDK === from dispatchai import load_model model = load_model("Qwen2.5-Math-1.5B-mobile", backend="gguf") # Chat response = model.chat("What is the capital of France?") print(f"Capital: {response}") # With system prompt response = model.chat("Summarize this: The meeting is at 3pm.", system="You are a concise assistant.") print(f"Summary: {response}") # === Using llama-cpp-python directly === from llama_cpp import Llama llm = Llama(model_path="model.gguf", chat_format="chatml", n_ctx=512, n_threads=4, verbose=False) response = llm.create_chat_completion( messages=[{"role": "user", "content": "What is 2+2?"}], max_tokens=30, ) print(f"Math: {response['choices'][0]['message']['content']}") # === Using llama.cpp CLI === # llama-cli -m model.gguf -p "Hello!" -n 30 -t 4 -st --chat-format chatml