Update README.md
Browse files4bit QTZ inference
README.md
CHANGED
|
@@ -121,15 +121,32 @@ The model was fine-tuned on a custom dataset (`data.jsonl`) consisting of:
|
|
| 121 |
## Example Usage
|
| 122 |
|
| 123 |
```python
|
| 124 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
prompt = "Explain SOLID principles in OOP?"
|
| 130 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 131 |
-
|
|
|
|
| 132 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
|
|
| 133 |
```
|
| 134 |
|
| 135 |
## Developer
|
|
|
|
| 121 |
## Example Usage
|
| 122 |
|
| 123 |
```python
|
| 124 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 125 |
+
import torch
|
| 126 |
|
| 127 |
+
model_id = "techpro-saida/msci_software_engineering_slm_v1"
|
| 128 |
+
|
| 129 |
+
# 4-bit config for efficient inference
|
| 130 |
+
bnb_config = BitsAndBytesConfig(
|
| 131 |
+
load_in_4bit=True,
|
| 132 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 133 |
+
bnb_4bit_quant_type="nf4",
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 137 |
+
|
| 138 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 139 |
+
model_id,
|
| 140 |
+
quantization_config=bnb_config,
|
| 141 |
+
device_map="auto", # automatically balances between GPU/CPU
|
| 142 |
+
)
|
| 143 |
|
| 144 |
prompt = "Explain SOLID principles in OOP?"
|
| 145 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
| 146 |
+
|
| 147 |
+
outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7, top_p=0.9)
|
| 148 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 149 |
+
|
| 150 |
```
|
| 151 |
|
| 152 |
## Developer
|