Spaces:
Runtime error
Runtime error
| import base64 | |
| from langchain_core.tools import tool | |
| from langchain_core.messages import HumanMessage | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash") | |
| def analyze_image(img_path: str, question: str) -> str: | |
| """ | |
| Extract text from an image file using a multimodal model. | |
| """ | |
| all_text = "" | |
| try: | |
| # Read image and encode as base64 | |
| with open(img_path, "rb") as image_file: | |
| image_bytes = image_file.read() | |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") | |
| # Prepare the prompt including the base64 image data | |
| message = [ | |
| HumanMessage( | |
| content=[ | |
| { | |
| "type": "text", | |
| "text": ( | |
| "Analyze the image and answer the following question: " + question | |
| ), | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{image_base64}" | |
| }, | |
| }, | |
| ] | |
| ) | |
| ] | |
| # Call the vision-capable model | |
| # Call the vision-capable model with the prepared message list | |
| response = llm.invoke(message) | |
| # Append extracted text | |
| all_text += response.content + "\n\n" | |
| return all_text.strip() | |
| except Exception as e: | |
| # A butler should handle errors gracefully | |
| error_msg = f"Error extracting text: {str(e)}" | |
| print(error_msg) | |
| return "" | |
| if __name__ == "__main__": | |
| # Example usage | |
| img_path = r"C:\Users\pkduo\OneDrive\Máy tính\HF Agent Course Final\Final_Assignment_Template\Screenshot 2025-05-02 144021.png" | |
| question = "Review the chess position provided in the image. It is white's turn. Provide the correct next move for white which guarantees a win. Please provide your response in algebraic notation.?" | |
| # Invoke the tool using the recommended .invoke() method with a dictionary input | |
| result = analyze_image.invoke({"img_path": img_path, "question": question}) | |
| print(result) |