Lumintroll's picture
Added tools. It's a me!
0a55bd6
from dotenv import load_dotenv
load_dotenv()
use_mistral = False
use_gemini = True
import base64
import requests
import os
from mistralai import Mistral
def encode_image(image_path):
"""Encode the image to base64."""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"Error: The file {image_path} was not found.")
return None
except Exception as e: # Added general exception handling
print(f"Error: {e}")
return None
# Path to your image
image_path = "chess_test.jpg"
# Getting the base64 string
base64_image = encode_image(image_path)
# Retrieve the API key from environment variables
api_key = os.environ.get("API_KEY_MISTRAL")
# Specify model
model = "pixtral-large-latest"
# Initialize the Mistral client
client = Mistral(api_key=api_key)
# Define the messages for the chat
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": r"""Below is an image of a chess board mid-game. Only use the image as a reference for the response. NEVER use implicit knowledge of chess or positions.
The bottom left square is A1, the top right square is H8.
Identify the position of all pieces in JSON format: {colour:{piece_type:[coordinates]}}
Chess board diagram:"""
},
{
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}"
}
]
}
]
if use_mistral:
# Get the chat response
chat_response = client.chat.complete(
model=model,
messages=messages
)
# Print the content of the response
print(chat_response.choices[0].message.content)
#### Gemini
from google import genai
from google.genai import types
# Only run this block for Gemini Developer API
client = genai.Client(api_key=os.environ.get("API_KEY_GEMINI2"))
flash = True
if flash:
google_model = 'gemini-2.5-flash-preview-05-20'
else:
google_model = 'gemini-2.5-pro-preview-05-06'
chess_prompt = """Using this image of a chess board diagram. Black squares are coloured dark brown, white squares are light brown. A1 is at the bottom left of the image, H8 is at the top right. Complete the following tasks in order:
Task 1: Count the number of occupied and unoccupied squares in each row. e.g. {'occupied':3, 'unoccupied':5} => STRING
Task 2: Count the number of each piece type in each row. Check that they add up to the total number of pieces. => STRING
Task 3: In JSON format note the position of every piece by colour, type and then list of coordinates => JSON
Task 4: Convert JSON format to FEN string. {'board_fen': <FEN STRING>} => JSON"""
# To run this code you need to install the following dependencies:
# pip install google-genai
import base64
import os
from google import genai
from google.genai import types
def generate():
client = genai.Client(
api_key=os.environ.get("GEMINI_API_KEY"),
)
model = google_model #"gemini-2.5-pro-preview-05-06"
contents = [
types.Content(
role="user",
parts=[
types.Part.from_bytes(
mime_type="image/jpeg",
data=base64_image,
),
types.Part.from_text(text=chess_prompt),
],
),
]
generate_content_config = types.GenerateContentConfig(
temperature=0.15,
response_mime_type="text/plain",
)
for chunk in client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
):
print(chunk.text, end="")
if __name__ == "__main__":
generate()