import os from google import genai from google.genai import types from PIL import Image def extract_text_gemini(image_path: str) -> str: """ Sends an image to Gemini Vision and extracts the text/data. Requires GEMINI_API_KEY environment variable to be set. """ # Initialize the client (automatically picks up GEMINI_API_KEY from env) client = genai.Client() # Load the image image = Image.open(image_path) # Define the prompt prompt = "Extract all text, handwriting, and tabular data from this medical document accurately." # Call the model (gemini-2.5-flash is currently recommended for fast, multimodal tasks) response = client.models.generate_content( model="gemini-2.5-flash", contents=[prompt, image] ) return response.text # Example Usage: api_key = os.environ["GEMINI_API_KEY"] text_output = extract_text_gemini("handwriting.png") print(text_output)