from consts import REASONING_START, REASONING_END, SOLUTION_START, SOLUTION_END
from transformers import TextStreamer
from unsloth import FastVisionModel


def inference(idx: int, model, dataset, tokenizer):
    FastVisionModel.for_inference(model)
    image = dataset[idx]["decoded_image"]
    instruction = (
        f"{dataset[idx]["question"]}, provide your reasoning between {REASONING_START} and {REASONING_END} "
        f"and then your final answer between {SOLUTION_START} and (put a float here) {SOLUTION_END}"
    )

    messages = [
        {
            "role": "user",
            "content": [{"type": "image"}, {"type": "text", "text": instruction}],
        }
    ]

    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")

    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    result = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                            use_cache=True, temperature = 1.0, top_p = 0.95, top_k = 64)
    return result