File size: 2,662 Bytes
b0c0df0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
from typing import List
from PIL import Image
REASONING_SYS_PROMPT = (
"You are a helpful assistant.\n\n"
"Solve the following problem step by step, and optionally write Python code "
"for image manipulation to enhance your reasoning process. The Python code "
"will be executed by an external sandbox, and the processed image or result "
"(wrapped in <sandbox_output></sandbox_output>) can be returned to aid your "
"reasoning and help you arrive at the final answer.\n\n"
"**Reasoning & Image Manipulation (Optional but Encouraged):**\n"
" * You have the capability to write executable Python code to perform "
"image manipulations (e.g., cropping to a Region of Interest (ROI), "
"resizing, rotation, adjusting contrast) or perform calculation for better "
"reasoning.\n"
" * The code will be executed in a secure sandbox, and its output will be "
"provided back to you for further analysis.\n"
" * All Python code snippets **must** be wrapped as follows:\n"
" <code>\n"
" ```python\n"
" # your code.\n"
" ```\n"
" </code>\n"
" * At the end of the code, print the path of the processed image "
"(processed_path) or the result for further processing in a sandbox "
"environment."
)
SIMPLE_SYS_PROMPT = "You are a helpful assistant."
def generate_prompt_simple_qa(user_question: str) -> str:
"""Build a minimal VQA prompt that answers directly with no reasoning."""
# Construct the prompt based on the given requirements
prompt = (
"You are an advanced AI assistant specializing in visual question "
"answering (VQA). You don't need to perform any image manipulation "
"or reasoning. Give the answer to the following question directly.\n"
f'**User\'s Question:** "{user_question}"'
)
return prompt
def generate_prompt_final_qa(user_question: str, user_image_path: str) -> str:
"""Build a reasoning-mode VQA prompt with image metadata (WxH)."""
try:
with Image.open(user_image_path) as img:
user_image_size = f"{img.width}x{img.height}"
except (FileNotFoundError, OSError, IOError):
user_image_size = "Unable to determine (error reading image)"
prompt = f"""<image>
{user_question}
### User Image Path: "{user_image_path}"
### User Image Size: "{user_image_size}"
### **Output Format (strict adherence required):**
<think>Your detailed reasoning process, including any code, should go here.</think>
<answer>Your final answer to the user's question goes here.</answer>
"""
return prompt
SPECIAL_STRING_LIST = ["</code>", "</answer>"]
|