intern_assignment / gemini_vision.py
Battlecon's picture
Initial clean deployment commit
05cb41b
Raw
History Blame Contribute Delete
950 Bytes
import os
from google import genai
from google.genai import types
from PIL import Image
def extract_text_gemini(image_path: str) -> str:
"""
Sends an image to Gemini Vision and extracts the text/data.
Requires GEMINI_API_KEY environment variable to be set.
"""
# Initialize the client (automatically picks up GEMINI_API_KEY from env)
client = genai.Client()
# Load the image
image = Image.open(image_path)
# Define the prompt
prompt = "Extract all text, handwriting, and tabular data from this medical document accurately."
# Call the model (gemini-2.5-flash is currently recommended for fast, multimodal tasks)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[prompt, image]
)
return response.text
# Example Usage:
api_key = os.environ["GEMINI_API_KEY"]
text_output = extract_text_gemini("handwriting.png")
print(text_output)