Usage
This model was fine-tuned using PEFT/LoRA, with the linear layers updated
! pip install \
transformers==5.3.0 \
datasets==4.0.0 \
peft==0.18.1 \
trl==0.29.0 \
bitsandbytes==0.49.2 \
accelerate==1.13.0
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
b_model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
fmodel = PeftModel.from_pretrained(
b_model,
"zoeeyys/qwen0.8-v0"
)
gtokenizer = AutoTokenizer.from_pretrained( "Qwen/Qwen3.5-0.8B",padding_side="left")
messages = [
{"role": "user", "content": "hi,i want to career shift into software"}
]
inputs = gtokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(fmodel.device)
outputs = fmodel.generate(**inputs, max_new_tokens=2048,eos_token_id=gtokenizer.eos_token_id,
pad_token_id=gtokenizer.eos_token_id)
print(gtokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:],skip_special_tokens=True))
- Downloads last month
- 46