Update README.md
Browse files
README.md
CHANGED
|
@@ -50,7 +50,7 @@ Refer to the snippet below to run H1 models using 🤗 transformers:
|
|
| 50 |
import torch
|
| 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 52 |
|
| 53 |
-
model_id = "tiiuae/Falcon-
|
| 54 |
|
| 55 |
model = AutoModelForCausalLM.from_pretrained(
|
| 56 |
model_id,
|
|
@@ -64,7 +64,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 64 |
or
|
| 65 |
|
| 66 |
```bash
|
| 67 |
-
transformers serve tiiuae/Falcon-
|
| 68 |
```
|
| 69 |
|
| 70 |
### `llama.cpp`
|
|
@@ -74,14 +74,14 @@ You can find all GGUF files compatible with `llama.cpp` under [our official coll
|
|
| 74 |
```bash
|
| 75 |
brew install llama.cpp
|
| 76 |
pip install huggingface_hub
|
| 77 |
-
hf download tiiuae/Falcon-
|
| 78 |
-
llama-cli ./Falcon-
|
| 79 |
```
|
| 80 |
|
| 81 |
### `ollama`
|
| 82 |
|
| 83 |
```bash
|
| 84 |
-
ollama run hf.co/tiiuae/Falcon-
|
| 85 |
```
|
| 86 |
|
| 87 |
### Apple `mlx`
|
|
@@ -96,14 +96,14 @@ For vLLM, simply start a server by executing the command below:
|
|
| 96 |
|
| 97 |
```bash
|
| 98 |
# pip install vllm>=0.9.0
|
| 99 |
-
vllm serve tiiuae/Falcon-
|
| 100 |
```
|
| 101 |
|
| 102 |
### sglang
|
| 103 |
|
| 104 |
```bash
|
| 105 |
python -m sglang.launch_server \
|
| 106 |
-
--model ttiiuae/Falcon-
|
| 107 |
--tensor-parallel-size 1
|
| 108 |
```
|
| 109 |
|
|
|
|
| 50 |
import torch
|
| 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 52 |
|
| 53 |
+
model_id = "tiiuae/Falcon-H1-Tiny-R-0.6B"
|
| 54 |
|
| 55 |
model = AutoModelForCausalLM.from_pretrained(
|
| 56 |
model_id,
|
|
|
|
| 64 |
or
|
| 65 |
|
| 66 |
```bash
|
| 67 |
+
transformers serve tiiuae/Falcon-H1-Tiny-R-0.6B
|
| 68 |
```
|
| 69 |
|
| 70 |
### `llama.cpp`
|
|
|
|
| 74 |
```bash
|
| 75 |
brew install llama.cpp
|
| 76 |
pip install huggingface_hub
|
| 77 |
+
hf download tiiuae/Falcon-H1-Tiny-R-0.6B-GGUF Falcon-H1-Tiny-R-0.6B-Q8_0.gguf --local-dir ./
|
| 78 |
+
llama-cli ./Falcon-H1-Tiny-R-0.6B-Q8_0.gguf -cnv
|
| 79 |
```
|
| 80 |
|
| 81 |
### `ollama`
|
| 82 |
|
| 83 |
```bash
|
| 84 |
+
ollama run hf.co/tiiuae/Falcon-H1-Tiny-R-0.6B-GGUF:Q8_0
|
| 85 |
```
|
| 86 |
|
| 87 |
### Apple `mlx`
|
|
|
|
| 96 |
|
| 97 |
```bash
|
| 98 |
# pip install vllm>=0.9.0
|
| 99 |
+
vllm serve tiiuae/Falcon-H1-Tiny-R-0.6B --tensor-parallel-size 2 --data-parallel-size 1
|
| 100 |
```
|
| 101 |
|
| 102 |
### sglang
|
| 103 |
|
| 104 |
```bash
|
| 105 |
python -m sglang.launch_server \
|
| 106 |
+
--model ttiiuae/Falcon-H1-Tiny-R-0.6B \
|
| 107 |
--tensor-parallel-size 1
|
| 108 |
```
|
| 109 |
|