Instructions to use ryefoxlime/TADBot with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ryefoxlime/TADBot with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ryefoxlime/TADBot")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("ryefoxlime/TADBot", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use ryefoxlime/TADBot with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ryefoxlime/TADBot" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/ryefoxlime/TADBot
- SGLang
How to use ryefoxlime/TADBot with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ryefoxlime/TADBot" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ryefoxlime/TADBot" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ryefoxlime/TADBot", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use ryefoxlime/TADBot with Docker Model Runner:
docker model run hf.co/ryefoxlime/TADBot
Commit ·
d1734fa
1
Parent(s): 313f14f
Full Fine Tuned Model
Browse files- Gemma2_2B/inference.ipynb +25 -6
Gemma2_2B/inference.ipynb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
@@ -291,13 +291,12 @@
|
|
| 291 |
"metadata": {},
|
| 292 |
"outputs": [],
|
| 293 |
"source": [
|
|
|
|
| 294 |
"from peft import PeftModel\n",
|
| 295 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 296 |
"\n",
|
| 297 |
"# Load the base model and tokenizer\n",
|
| 298 |
"model_name = \"google/gemma-2-2b-it\"\n",
|
| 299 |
-
"device_map = {\"\": 0} # Use GPU 0 for the model\n",
|
| 300 |
-
"\n",
|
| 301 |
"# Load the fine-tuned model\n",
|
| 302 |
"new_model = \"gemma-2-2b-ft/\" # Replace with the path to your fine-tuned model"
|
| 303 |
]
|
|
@@ -310,7 +309,7 @@
|
|
| 310 |
{
|
| 311 |
"data": {
|
| 312 |
"application/vnd.jupyter.widget-view+json": {
|
| 313 |
-
"model_id": "
|
| 314 |
"version_major": 2,
|
| 315 |
"version_minor": 0
|
| 316 |
},
|
|
@@ -320,18 +319,38 @@
|
|
| 320 |
},
|
| 321 |
"metadata": {},
|
| 322 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
}
|
| 324 |
],
|
| 325 |
"source": [
|
| 326 |
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
| 327 |
-
" model_name,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
"model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
|
| 329 |
"model = model.merge_and_unload()\n",
|
|
|
|
| 330 |
"\n",
|
| 331 |
"# Reload tokenizer to save it\n",
|
| 332 |
"tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir = \".cache/\")\n",
|
| 333 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
| 334 |
-
"tokenizer.padding_side = \"right\"\n"
|
|
|
|
| 335 |
]
|
| 336 |
},
|
| 337 |
{
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 6,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
|
|
| 291 |
"metadata": {},
|
| 292 |
"outputs": [],
|
| 293 |
"source": [
|
| 294 |
+
"import torch\n",
|
| 295 |
"from peft import PeftModel\n",
|
| 296 |
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
|
| 297 |
"\n",
|
| 298 |
"# Load the base model and tokenizer\n",
|
| 299 |
"model_name = \"google/gemma-2-2b-it\"\n",
|
|
|
|
|
|
|
| 300 |
"# Load the fine-tuned model\n",
|
| 301 |
"new_model = \"gemma-2-2b-ft/\" # Replace with the path to your fine-tuned model"
|
| 302 |
]
|
|
|
|
| 309 |
{
|
| 310 |
"data": {
|
| 311 |
"application/vnd.jupyter.widget-view+json": {
|
| 312 |
+
"model_id": "21f72716997c42cfa2244677b36b85f8",
|
| 313 |
"version_major": 2,
|
| 314 |
"version_minor": 0
|
| 315 |
},
|
|
|
|
| 319 |
},
|
| 320 |
"metadata": {},
|
| 321 |
"output_type": "display_data"
|
| 322 |
+
},
|
| 323 |
+
{
|
| 324 |
+
"data": {
|
| 325 |
+
"text/plain": [
|
| 326 |
+
"('gemma2-TADBot\\\\tokenizer_config.json',\n",
|
| 327 |
+
" 'gemma2-TADBot\\\\special_tokens_map.json',\n",
|
| 328 |
+
" 'gemma2-TADBot\\\\tokenizer.json')"
|
| 329 |
+
]
|
| 330 |
+
},
|
| 331 |
+
"execution_count": 2,
|
| 332 |
+
"metadata": {},
|
| 333 |
+
"output_type": "execute_result"
|
| 334 |
}
|
| 335 |
],
|
| 336 |
"source": [
|
| 337 |
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
| 338 |
+
" model_name,\n",
|
| 339 |
+
" low_cpu_mem_usage=True,\n",
|
| 340 |
+
" return_dict=True,\n",
|
| 341 |
+
" torch_dtype=torch.float16,\n",
|
| 342 |
+
" device_map=\"cpu\",\n",
|
| 343 |
+
" cache_dir=\".cache/\"\n",
|
| 344 |
+
")\n",
|
| 345 |
"model = PeftModel.from_pretrained(base_model, new_model, cache_dir = \".cache/\")\n",
|
| 346 |
"model = model.merge_and_unload()\n",
|
| 347 |
+
"model.save_pretrained(\"gemma2-TADBot\")\n",
|
| 348 |
"\n",
|
| 349 |
"# Reload tokenizer to save it\n",
|
| 350 |
"tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir = \".cache/\")\n",
|
| 351 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
| 352 |
+
"tokenizer.padding_side = \"right\"\n",
|
| 353 |
+
"tokenizer.save_pretrained(\"gemma2-TADBot\")"
|
| 354 |
]
|
| 355 |
},
|
| 356 |
{
|