Spaces:

Isics
/

agents_gaia

Runtime error

agents_gaia / tools /vision_tools.py

initial commit

32844c7 4 months ago

1.61 kB

	import base64

	import requests
	from config import IP_WINDOWS
	import ollama
	from smolagents import tool


	@tool
	def analyze_image(image_path: str, question: str) -> str:
	""" Analyze an image using a local vision model and answer a question about it.
	Use this tool when you need to extract information from a jpg/png file.

	Args:
	image_path: The local path to the image file (e.g. 'images/grafico.png').
	question: The specific question about what to look for in the image (e.g. 'What value is the red bar?').

	Returns:
	str: The answer to the question, based on the image.

	Example:
	>>> result = analise_image("images/grafico.png", "What value is the red bar?")
	"""
	# url = f"http://{IP_WINDOWS}:11434/api/generate"

	try:
	# Codificamos la imagen a base64 para enviarla por red
	with open(image_path, "rb") as image_file:
	img_str = base64.b64encode(image_file.read()).decode('utf-8')

	# payload = {
	# "model": "llava", # Asegúrate de tener este modelo en Windows
	# "prompt": question,
	# "images": [img_str],
	# "stream": False
	#}

	# response = requests.post(url, json=payload)
	response = ollama.chat(model='llava',
	messages=[{'role': 'user',
	'content': question,
	'images': image_file}])
	return response['messages']['content']

	except Exception as e:
	return f"Error conectando con Windows: {str(e)}"