File size: 967 Bytes
4faa5e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Llama-3.2-1B-Instruct-mobile — Verified Usage Examples
# Chat format: llama-3
# CPU speed: 4.9 t/s
# Verified: June 2026

# === Using dispatchai SDK ===
from dispatchai import load_model
model = load_model("Llama-3.2-1B-Instruct-mobile", backend="gguf")

# Chat
response = model.chat("What is the capital of France?")
print(f"Capital: {response}")

# With system prompt
response = model.chat("Summarize this: The meeting is at 3pm.", system="You are a concise assistant.")
print(f"Summary: {response}")

# === Using llama-cpp-python directly ===
from llama_cpp import Llama
llm = Llama(model_path="model.gguf", chat_format="llama-3", n_ctx=512, n_threads=4, verbose=False)

response = llm.create_chat_completion(
    messages=[{"role": "user", "content": "What is 2+2?"}],
    max_tokens=30,
)
print(f"Math: {response['choices'][0]['message']['content']}")

# === Using llama.cpp CLI ===
# llama-cli -m model.gguf -p "Hello!" -n 30 -t 4 -st --chat-format llama-3