3morixd commited on
Commit
6d026a9
·
verified ·
1 Parent(s): 6e38aa2

Upload usage_examples.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. usage_examples.py +29 -0
usage_examples.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen2.5-Coder-7B-mobile — Verified Usage Examples
2
+ # Chat format: chatml
3
+ # CPU speed: 3.0 t/s
4
+ # Verified: June 2026
5
+
6
+ # === Using dispatchai SDK ===
7
+ from dispatchai import load_model
8
+ model = load_model("Qwen2.5-Coder-7B-mobile", backend="gguf")
9
+
10
+ # Chat
11
+ response = model.chat("What is the capital of France?")
12
+ print(f"Capital: {response}")
13
+
14
+ # With system prompt
15
+ response = model.chat("Summarize this: The meeting is at 3pm.", system="You are a concise assistant.")
16
+ print(f"Summary: {response}")
17
+
18
+ # === Using llama-cpp-python directly ===
19
+ from llama_cpp import Llama
20
+ llm = Llama(model_path="model.gguf", chat_format="chatml", n_ctx=512, n_threads=4, verbose=False)
21
+
22
+ response = llm.create_chat_completion(
23
+ messages=[{"role": "user", "content": "What is 2+2?"}],
24
+ max_tokens=30,
25
+ )
26
+ print(f"Math: {response['choices'][0]['message']['content']}")
27
+
28
+ # === Using llama.cpp CLI ===
29
+ # llama-cli -m model.gguf -p "Hello!" -n 30 -t 4 -st --chat-format chatml