{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6511a91c-ed20-41ff-befb-699bda1912a3", "metadata": { "execution": { "iopub.execute_input": "2026-03-25T05:42:29.023013Z", "iopub.status.busy": "2026-03-25T05:42:29.022863Z", "iopub.status.idle": "2026-03-25T05:42:40.880280Z", "shell.execute_reply": "2026-03-25T05:42:40.879248Z", "shell.execute_reply.started": "2026-03-25T05:42:29.022998Z" }, "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8310548c3b0d460899adcb96ee4af2e1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (incomplete total...): 0.00B [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "663ea1161c934235a53948b93d224495", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Fetching 2 files: 0%| | 0/2 [00:00 and <|vision_end|> tags tell the processor \n", "# where to inject the image features.\n", "prompt = (\n", " \"<|im_start|>user\\n\"\n", " \"<|vision_start|><|image_pad|><|vision_end|>\"\n", " f\"{user_query}<|im_end|>\\n\"\n", " \"<|im_start|>assistant\\n\"\n", ")\n", "\n", "# 4. Process the vision information\n", "# We still use this utility to fetch the image and handle resizing logic\n", "messages = [{\"role\": \"user\", \"content\": [{\"type\": \"image\", \"image\": image_url}]}]\n", "image_inputs, _ = process_vision_info(messages)\n", "\n", "# 5. Tokenize and Prepare Tensors\n", "inputs = processor(\n", " text=[prompt],\n", " images=image_inputs,\n", " videos=None,\n", " padding=True,\n", " return_tensors=\"pt\",\n", ")\n", "inputs = inputs.to(model.device)\n", "\n", "# 6. Generate\n", "generated_ids = model.generate(**inputs, max_new_tokens=100)\n", "\n", "# Trim the prompt tokens from the result\n", "generated_ids_trimmed = [\n", " out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)\n", "]\n", "\n", "output_text = processor.batch_decode(\n", " generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False\n", ")\n", "\n", "print(f\"\\nManual Prompt Response: {output_text[0]}\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "f45df021-6302-4f47-9e06-8070577885a2", "metadata": { "execution": { "iopub.execute_input": "2026-03-25T04:36:13.766580Z", "iopub.status.busy": "2026-03-25T04:36:13.766400Z", "iopub.status.idle": "2026-03-25T04:36:13.770145Z", "shell.execute_reply": "2026-03-25T04:36:13.769588Z", "shell.execute_reply.started": "2026-03-25T04:36:13.766563Z" } }, "outputs": [ { "data": { "text/plain": [ "'<|im_start|>user\\n<|vision_start|><|image_pad|><|vision_end|>Describe the image<|im_end|>\\n<|im_start|>assistant\\n'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prompt" ] }, { "cell_type": "code", "execution_count": 2, "id": "504fa71b-42b4-4f53-8988-25fcfba38d13", "metadata": { "execution": { "iopub.execute_input": "2026-03-25T05:43:53.839325Z", "iopub.status.busy": "2026-03-25T05:43:53.839044Z", "iopub.status.idle": "2026-03-25T05:43:53.843214Z", "shell.execute_reply": "2026-03-25T05:43:53.842555Z", "shell.execute_reply.started": "2026-03-25T05:43:53.839304Z" } }, "outputs": [], "source": [ "cos = torch.load('cos.pt')" ] }, { "cell_type": "code", "execution_count": 3, "id": "642d9dcf-e591-4d70-96af-b69bf955d9e1", "metadata": { "execution": { "iopub.execute_input": "2026-03-25T05:43:54.296041Z", "iopub.status.busy": "2026-03-25T05:43:54.295869Z", "iopub.status.idle": "2026-03-25T05:43:54.299276Z", "shell.execute_reply": "2026-03-25T05:43:54.298634Z", "shell.execute_reply.started": "2026-03-25T05:43:54.296029Z" } }, "outputs": [ { "data": { "text/plain": [ "(torch.Size([1, 1, 94, 128]), torch.float16)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cos.shape, cos.dtype" ] }, { "cell_type": "code", "execution_count": null, "id": "f44460e3-58e9-4fd2-898a-06e8a00f9365", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }