Spaces:
Runtime error
Runtime error
| import io | |
| import base64 | |
| import os | |
| import requests | |
| from PIL import Image | |
| from smolagents import tool, OpenAIServerModel | |
| from tools import get_file_content | |
| def encode_image(image_bytes: bytes, new_size=512): | |
| # Resize image to upper 512 pixels and return in base64 format | |
| image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
| original_width, original_height = image.size | |
| if original_width > original_height: | |
| ratio = new_size / original_width | |
| else: | |
| ratio = new_size / original_height | |
| new_width = int(original_width * ratio) | |
| new_height = int(original_height * ratio) | |
| resized_image = image.resize((new_width, new_height)) | |
| buffered = io.BytesIO() | |
| resized_image.save(buffered, format='JPEG') | |
| return base64.b64encode(buffered.getvalue()).decode('utf-8') | |
| def download_image(task_id: str, api_url: str) -> None: | |
| # Downloads an image file and encode it in base64 format | |
| #questions_files = f"{api_url}/files" | |
| #response = requests.get(f"{questions_files}/{task_id}", timeout=15) | |
| response = get_file_content(task_id, api_url) | |
| encoded_image = encode_image(response.content) | |
| return encoded_image | |
| def call_vision_llm(user_query: str, file_id: str, file_url: str) -> str: | |
| """ | |
| Downloads the image using the file_id and file_url, then analyzes it using a vision-based LLM, following user query. | |
| Args: | |
| user_query: User request on image. | |
| file_id: metadata required to download the image. | |
| file_url: metadata required to download the image. | |
| """ | |
| encoded_image = download_image(file_id, file_url) | |
| OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
| vision_model = OpenAIServerModel( | |
| api_key=OPENAI_API_KEY, | |
| model_id='gpt-4o-mini', | |
| temperature=0, | |
| ) | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": user_query, | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{encoded_image}", | |
| "detail": "low" | |
| } | |
| } | |
| ] | |
| } | |
| ] | |
| response = vision_model(messages).content | |
| return response |