ToriiGate-0.5 / scripts /transformers_ver.py
Minthy's picture
Upload folder using huggingface_hub
9229b0a verified
from prompts import make_user_query, system_prompt
from transformers import (
Qwen3_5ForConditionalGeneration,
AutoProcessor,
)
from PIL import Image
import torch
MODEL_PATH = "M:/ai/qwen3.5_mm_trainer/Qwen3.5-4B-Base_k2"
DEVICE = 'cuda'
model = Qwen3_5ForConditionalGeneration.from_pretrained(
MODEL_PATH,
torch_dtype=torch.bfloat16,
attn_implementation="sdpa",
device_map=DEVICE
)
processor = AutoProcessor.from_pretrained(
MODEL_PATH,
min_pixels=256*32*32,
padding_side="right"
)
C_TYPE = 'long_thoughts_v2'
USE_NAMES = True
ADD_TAGS = False
ADD_CHAR_LIST = False
ADD_CHARS_TAGS = False
ADD_CHARS_DESCR = False
def prepare_messages(item):
user_query = make_user_query(item,
C_TYPE, USE_NAMES, ADD_TAGS, ADD_CHAR_LIST, ADD_CHARS_TAGS, ADD_CHARS_DESCR
)
return [
{
"role": "system",
"content": [{"type": "text", "text": system_prompt}]
},
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": user_query},
],
}
]
img = Image.open('test_image.png')
images = [img]
msgs = prepare_messages({})
texts = [processor.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)]
inputs = processor(text=texts, images=images, return_tensors="pt")
inputs = {k:v.to(DEVICE) for k,v in inputs.items()}
with torch.no_grad():
generate_ids = model.generate(**inputs, max_new_tokens=1024)
generated_texts = processor.batch_decode(
generate_ids[:, inputs["input_ids"].shape[1]:],
skip_special_tokens=True
)
print(generated_texts[0])