Instructions to use ssmits/Zamba2-1.2B-instruct-Dutch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ssmits/Zamba2-1.2B-instruct-Dutch with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ssmits/Zamba2-1.2B-instruct-Dutch") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ssmits/Zamba2-1.2B-instruct-Dutch") model = AutoModelForCausalLM.from_pretrained("ssmits/Zamba2-1.2B-instruct-Dutch") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use ssmits/Zamba2-1.2B-instruct-Dutch with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ssmits/Zamba2-1.2B-instruct-Dutch" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ssmits/Zamba2-1.2B-instruct-Dutch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/ssmits/Zamba2-1.2B-instruct-Dutch
- SGLang
How to use ssmits/Zamba2-1.2B-instruct-Dutch with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ssmits/Zamba2-1.2B-instruct-Dutch" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ssmits/Zamba2-1.2B-instruct-Dutch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ssmits/Zamba2-1.2B-instruct-Dutch" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ssmits/Zamba2-1.2B-instruct-Dutch", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use ssmits/Zamba2-1.2B-instruct-Dutch with Docker Model Runner:
docker model run hf.co/ssmits/Zamba2-1.2B-instruct-Dutch
Upload 2 files
Browse files- finetune.py +3 -1
- optimize_lr.py +4 -1
finetune.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
| 4 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
@@ -10,6 +11,7 @@ from transformers import (
|
|
| 10 |
import torch
|
| 11 |
from datasets import load_dataset
|
| 12 |
from huggingface_hub import login
|
|
|
|
| 13 |
|
| 14 |
# setup tokenizer
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
|
@@ -73,7 +75,7 @@ training_args = TrainingArguments(
|
|
| 73 |
save_steps=500,
|
| 74 |
save_total_limit=2,
|
| 75 |
logging_steps=100,
|
| 76 |
-
learning_rate=
|
| 77 |
weight_decay=0.01,
|
| 78 |
fp16=False,
|
| 79 |
bf16=True,
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
|
| 4 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
| 5 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 11 |
import torch
|
| 12 |
from datasets import load_dataset
|
| 13 |
from huggingface_hub import login
|
| 14 |
+
from optimize_lr import best_lr
|
| 15 |
|
| 16 |
# setup tokenizer
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
|
|
|
| 75 |
save_steps=500,
|
| 76 |
save_total_limit=2,
|
| 77 |
logging_steps=100,
|
| 78 |
+
learning_rate=best_lr,
|
| 79 |
weight_decay=0.01,
|
| 80 |
fp16=False,
|
| 81 |
bf16=True,
|
optimize_lr.py
CHANGED
|
@@ -398,4 +398,7 @@ plot_gpr_results(study, final_optimization)
|
|
| 398 |
|
| 399 |
# Save all results
|
| 400 |
with open("lr_optimization_results.json", "w") as f:
|
| 401 |
-
json.dump(results, f, indent=4)
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
# Save all results
|
| 400 |
with open("lr_optimization_results.json", "w") as f:
|
| 401 |
+
json.dump(results, f, indent=4)
|
| 402 |
+
|
| 403 |
+
# Store best learning rate as a variable for finetune.py to use
|
| 404 |
+
best_lr = study.best_params["learning_rate"]
|