|
|
---
|
|
|
base_model: Qwen/Qwen2.5-7B-Instruct
|
|
|
tags:
|
|
|
- text-generation-inference
|
|
|
- transformers
|
|
|
- qwen2
|
|
|
- trl
|
|
|
- sft
|
|
|
license: apache-2.0
|
|
|
language:
|
|
|
- zho
|
|
|
- eng
|
|
|
- fra
|
|
|
- spa
|
|
|
- por
|
|
|
- deu
|
|
|
- ita
|
|
|
- rus
|
|
|
- jpn
|
|
|
- kor
|
|
|
- vie
|
|
|
- tha
|
|
|
- ara
|
|
|
---
|
|
|
### Model detail
|
|
|
Reasoning natural and smarter\
|
|
|
No system prompt training\
|
|
|
LoRA training rank 16 and alpha 16\
|
|
|
Tool calling support
|
|
|
|
|
|
|
|
|
### Usage:
|
|
|
```
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
import torch
|
|
|
|
|
|
MAX_REASONING_TOKENS = 4096
|
|
|
MAX_RESPONSE_TOKENS = 1024
|
|
|
|
|
|
model_name = "beyoru/ThinkAgain1.5"
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
messages = []
|
|
|
|
|
|
def stream_output(output_text):
|
|
|
for char in output_text:
|
|
|
print(char, end="", flush=True)
|
|
|
|
|
|
while True:
|
|
|
prompt = input("USER: ")
|
|
|
|
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
|
|
# Generate reasoning
|
|
|
reasoning_template = tokenizer.apply_chat_template(messages, tokenize=False, add_reasoning_prompt=True)
|
|
|
reasoning_inputs = tokenizer(reasoning_template, return_tensors="pt").to(model.device)
|
|
|
reasoning_ids = model.generate(**reasoning_inputs, max_new_tokens=MAX_REASONING_TOKENS)
|
|
|
reasoning_output = tokenizer.decode(reasoning_ids[0, reasoning_inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
|
|
|
|
messages.append({"role": "reasoning", "content": reasoning_output})
|
|
|
|
|
|
print("REASONING: ", end="")
|
|
|
stream_output(reasoning_output)
|
|
|
print()
|
|
|
|
|
|
# Generate answer
|
|
|
response_template = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
response_inputs = tokenizer(response_template, return_tensors="pt").to(model.device)
|
|
|
response_ids = model.generate(**response_inputs, max_new_tokens=MAX_RESPONSE_TOKENS)
|
|
|
response_output = tokenizer.decode(response_ids[0, response_inputs.input_ids.shape[1]:], skip_special_tokens=True)
|
|
|
|
|
|
messages.append({"role": "assistant", "content": response_output})
|
|
|
|
|
|
print("ASSISTANT: ", end="")
|
|
|
stream_output(response_output)
|
|
|
print()
|
|
|
``` |