Update README.md
Browse files
README.md
CHANGED
|
@@ -132,38 +132,40 @@ We tested **Next2 0.8B** against its base model and other models in the sub-2B c
|
|
| 132 |
You can easily run **Next2 0.8B** on almost any machine with Python installed. Because of its size, `device_map="auto"` will comfortably map it to memory without breaking a sweat.
|
| 133 |
|
| 134 |
```python
|
| 135 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
| 136 |
import torch
|
| 137 |
|
| 138 |
-
model_id = "
|
| 139 |
|
| 140 |
-
|
|
|
|
| 141 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
| 152 |
]
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
# Generate with Thinking Mode optimal parameters
|
| 158 |
-
outputs = model.generate(
|
| 159 |
-
**inputs,
|
| 160 |
-
max_new_tokens=512,
|
| 161 |
-
temperature=0.6,
|
| 162 |
-
top_p=0.95,
|
| 163 |
-
repetition_penalty=1.1
|
| 164 |
-
)
|
| 165 |
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
```
|
| 168 |
|
| 169 |
---
|
|
|
|
| 132 |
You can easily run **Next2 0.8B** on almost any machine with Python installed. Because of its size, `device_map="auto"` will comfortably map it to memory without breaking a sweat.
|
| 133 |
|
| 134 |
```python
|
| 135 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
|
| 136 |
+
from PIL import Image
|
| 137 |
import torch
|
| 138 |
|
| 139 |
+
model_id = "thelamapi/next2-0.8b"
|
| 140 |
|
| 141 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 142 |
+
processor = AutoProcessor.from_pretrained(model_id) # For vision.
|
| 143 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# Create a message in chat format
|
| 147 |
+
messages = [
|
| 148 |
+
{"role": "system","content": [{"type": "text", "text": "You are Next2.5, a smart and concise AI assistant trained by Lamapi. Always respond in the user's language. Proudly made in Turkey."}]},
|
| 149 |
+
|
| 150 |
+
{
|
| 151 |
+
"role": "user","content": [
|
| 152 |
+
{"type": "text", "text": "Write a highly optimized Rust function to calculate the Fibonacci sequence using memoization"}
|
| 153 |
+
]
|
| 154 |
+
}
|
| 155 |
]
|
| 156 |
|
| 157 |
+
# Prepare input with Tokenizer
|
| 158 |
+
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
| 159 |
+
inputs = processor(text=prompt, return_tensors="pt")
|
| 160 |
+
|
| 161 |
+
# Remove 'mm_token_type_ids' if it's not needed for text-only generation
|
| 162 |
+
if "mm_token_type_ids" in inputs:
|
| 163 |
+
del inputs["mm_token_type_ids"]
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
# Output from the model
|
| 167 |
+
output = model.generate(**inputs, do_sample=True, temperature=0.7, max_new_tokens=128)
|
| 168 |
+
print(tokenizer.decode(output[0], skip_special_tokens=True))
|
| 169 |
```
|
| 170 |
|
| 171 |
---
|