File size: 950 Bytes
05cb41b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
from google import genai
from google.genai import types
from PIL import Image

def extract_text_gemini(image_path: str) -> str:
    """
    Sends an image to Gemini Vision and extracts the text/data.
    Requires GEMINI_API_KEY environment variable to be set.
    """
    # Initialize the client (automatically picks up GEMINI_API_KEY from env)
    client = genai.Client()
    
    # Load the image
    image = Image.open(image_path)
    
    # Define the prompt
    prompt = "Extract all text, handwriting, and tabular data from this medical document accurately."
    
    # Call the model (gemini-2.5-flash is currently recommended for fast, multimodal tasks)
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=[prompt, image]
    )
    
    return response.text

# Example Usage:
api_key = os.environ["GEMINI_API_KEY"]
text_output = extract_text_gemini("handwriting.png")
print(text_output)