Final_Assignment_Template

Runtime error

App Files Files Community

Final_Assignment_Template / vision_llm.py

silasyl

Initial commit with LFS-tracked files

ecbc0b3 about 1 year ago

Raw

History Blame Contribute Delete

2.34 kB

	import io
	import base64
	import os
	import requests
	from PIL import Image
	from smolagents import tool, OpenAIServerModel
	from tools import get_file_content


	def encode_image(image_bytes: bytes, new_size=512):
	# Resize image to upper 512 pixels and return in base64 format

	image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	original_width, original_height = image.size

	if original_width > original_height:
	ratio = new_size / original_width
	else:
	ratio = new_size / original_height

	new_width = int(original_width * ratio)
	new_height = int(original_height * ratio)

	resized_image = image.resize((new_width, new_height))
	buffered = io.BytesIO()
	resized_image.save(buffered, format='JPEG')
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	def download_image(task_id: str, api_url: str) -> None:
	# Downloads an image file and encode it in base64 format
	#questions_files = f"{api_url}/files"
	#response = requests.get(f"{questions_files}/{task_id}", timeout=15)
	response = get_file_content(task_id, api_url)
	encoded_image = encode_image(response.content)
	return encoded_image


	@tool
	def call_vision_llm(user_query: str, file_id: str, file_url: str) -> str:
	"""
	Downloads the image using the file_id and file_url, then analyzes it using a vision-based LLM, following user query.

	Args:
	user_query: User request on image.
	file_id: metadata required to download the image.
	file_url: metadata required to download the image.
	"""
	encoded_image = download_image(file_id, file_url)

	OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

	vision_model = OpenAIServerModel(
	api_key=OPENAI_API_KEY,
	model_id='gpt-4o-mini',
	temperature=0,
	)

	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": user_query,
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{encoded_image}",
	"detail": "low"
	}
	}
	]
	}
	]
	response = vision_model(messages).content

	return response