Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -29,7 +29,7 @@ model_name = "nn-tech/MetalGPT-1"
|
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 30 |
model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
model_name,
|
| 32 |
-
torch_dtype="bfloat16",
|
| 33 |
device_map="auto",
|
| 34 |
)
|
| 35 |
|
|
@@ -46,8 +46,6 @@ text = tokenizer.apply_chat_template(
|
|
| 46 |
messages,
|
| 47 |
tokenize=False,
|
| 48 |
add_generation_prompt=True,
|
| 49 |
-
# Если в чат‑шаблоне поддерживается режим 'thinking',
|
| 50 |
-
# можно добавить/убрать флаг enable_thinking:
|
| 51 |
# enable_thinking=False
|
| 52 |
)
|
| 53 |
|
|
@@ -128,8 +126,6 @@ print(response)
|
|
| 128 |
|
| 129 |
### vLLM Usage
|
| 130 |
|
| 131 |
-
Пример использования через `vLLM` (см. также `vllm_local.py` в репозитории):
|
| 132 |
-
|
| 133 |
```python
|
| 134 |
from transformers import AutoTokenizer
|
| 135 |
from vllm import LLM, SamplingParams
|
|
|
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 30 |
model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
model_name,
|
| 32 |
+
torch_dtype="bfloat16",
|
| 33 |
device_map="auto",
|
| 34 |
)
|
| 35 |
|
|
|
|
| 46 |
messages,
|
| 47 |
tokenize=False,
|
| 48 |
add_generation_prompt=True,
|
|
|
|
|
|
|
| 49 |
# enable_thinking=False
|
| 50 |
)
|
| 51 |
|
|
|
|
| 126 |
|
| 127 |
### vLLM Usage
|
| 128 |
|
|
|
|
|
|
|
| 129 |
```python
|
| 130 |
from transformers import AutoTokenizer
|
| 131 |
from vllm import LLM, SamplingParams
|