| from consts import REASONING_START, REASONING_END, SOLUTION_START, SOLUTION_END | |
| from transformers import TextStreamer | |
| from unsloth import FastVisionModel | |
| def inference(idx: int, model, dataset, tokenizer): | |
| FastVisionModel.for_inference(model) | |
| image = dataset[idx]["decoded_image"] | |
| instruction = ( | |
| f"{dataset[idx]["question"]}, provide your reasoning between {REASONING_START} and {REASONING_END} " | |
| f"and then your final answer between {SOLUTION_START} and (put a float here) {SOLUTION_END}" | |
| ) | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [{"type": "image"}, {"type": "text", "text": instruction}], | |
| } | |
| ] | |
| input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
| inputs = tokenizer( | |
| image, | |
| input_text, | |
| add_special_tokens=False, | |
| return_tensors="pt", | |
| ).to("cuda") | |
| text_streamer = TextStreamer(tokenizer, skip_prompt=True) | |
| result = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128, | |
| use_cache=True, temperature = 1.0, top_p = 0.95, top_k = 64) | |
| return result | |