Update README.md
Browse files
README.md
CHANGED
|
@@ -83,10 +83,11 @@ new_tokens = output_ids[0][inputs["input_ids"].shape[-1]:]
|
|
| 83 |
print(tokenizer.decode(new_tokens, skip_special_tokens=True))
|
| 84 |
```
|
| 85 |
## IF YOU WANT TALK IN LONG CONVERSATION
|
| 86 |
-
```
|
| 87 |
from peft import PeftModel
|
| 88 |
import torch
|
| 89 |
-
|
|
|
|
| 90 |
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
| 91 |
LORA_PATH = "lifatsastain/teach_lora1"
|
| 92 |
|
|
@@ -116,14 +117,14 @@ conversation_history = []
|
|
| 116 |
|
| 117 |
def chat(user_message):
|
| 118 |
conversation_history.append({"role": "user", "content": user_message})
|
| 119 |
-
|
| 120 |
messages = [{"role": "system", "content": SYSTEM}] + conversation_history
|
| 121 |
-
|
| 122 |
text = tokenizer.apply_chat_template(
|
| 123 |
messages, tokenize=False, add_generation_prompt=True
|
| 124 |
)
|
| 125 |
-
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 126 |
-
|
| 127 |
with torch.no_grad():
|
| 128 |
output_ids = model.generate(
|
| 129 |
**inputs,
|
|
@@ -133,10 +134,10 @@ def chat(user_message):
|
|
| 133 |
do_sample=True,
|
| 134 |
pad_token_id=tokenizer.eos_token_id
|
| 135 |
)
|
| 136 |
-
|
| 137 |
new_tokens = output_ids[0][inputs["input_ids"].shape[-1]:]
|
| 138 |
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 139 |
-
|
| 140 |
conversation_history.append({"role": "assistant", "content": response})
|
| 141 |
return response
|
| 142 |
|
|
@@ -155,6 +156,7 @@ while True:
|
|
| 155 |
continue
|
| 156 |
response = chat(user_input)
|
| 157 |
print(f"\nTutor: {response}\n")
|
|
|
|
| 158 |
'''
|
| 159 |
|
| 160 |
|
|
@@ -174,11 +176,12 @@ while True:
|
|
| 174 |
| Max sequence length | 512 |
|
| 175 |
| Quantization | 4-bit NF4 |
|
| 176 |
| Optimizer | paged_adamw_8bit |
|
|
|
|
| 177 |
|
| 178 |
### Framework Versions
|
| 179 |
-
|
| 180 |
-
-
|
| 181 |
-
-
|
| 182 |
-
-
|
| 183 |
-
-
|
| 184 |
-
-
|
|
|
|
| 83 |
print(tokenizer.decode(new_tokens, skip_special_tokens=True))
|
| 84 |
```
|
| 85 |
## IF YOU WANT TALK IN LONG CONVERSATION
|
| 86 |
+
```python
|
| 87 |
from peft import PeftModel
|
| 88 |
import torch
|
| 89 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 90 |
+
import bitsandbytes
|
| 91 |
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
| 92 |
LORA_PATH = "lifatsastain/teach_lora1"
|
| 93 |
|
|
|
|
| 117 |
|
| 118 |
def chat(user_message):
|
| 119 |
conversation_history.append({"role": "user", "content": user_message})
|
| 120 |
+
|
| 121 |
messages = [{"role": "system", "content": SYSTEM}] + conversation_history
|
| 122 |
+
|
| 123 |
text = tokenizer.apply_chat_template(
|
| 124 |
messages, tokenize=False, add_generation_prompt=True
|
| 125 |
)
|
| 126 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 127 |
+
|
| 128 |
with torch.no_grad():
|
| 129 |
output_ids = model.generate(
|
| 130 |
**inputs,
|
|
|
|
| 134 |
do_sample=True,
|
| 135 |
pad_token_id=tokenizer.eos_token_id
|
| 136 |
)
|
| 137 |
+
|
| 138 |
new_tokens = output_ids[0][inputs["input_ids"].shape[-1]:]
|
| 139 |
response = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
| 140 |
+
|
| 141 |
conversation_history.append({"role": "assistant", "content": response})
|
| 142 |
return response
|
| 143 |
|
|
|
|
| 156 |
continue
|
| 157 |
response = chat(user_input)
|
| 158 |
print(f"\nTutor: {response}\n")
|
| 159 |
+
|
| 160 |
'''
|
| 161 |
|
| 162 |
|
|
|
|
| 176 |
| Max sequence length | 512 |
|
| 177 |
| Quantization | 4-bit NF4 |
|
| 178 |
| Optimizer | paged_adamw_8bit |
|
| 179 |
+
|----------------------------------------------------------
|
| 180 |
|
| 181 |
### Framework Versions
|
| 182 |
+
- transformers: 5.3.0
|
| 183 |
+
- bitsandbytes: 0.49.2
|
| 184 |
+
- peft: 0.18.1
|
| 185 |
+
- torch: 2.10.0+cu126
|
| 186 |
+
- trl: 0.29.0
|
| 187 |
+
- datasets: 4.7.0
|