Spaces:

Battlecon
/

intern_assignment

Sleeping

intern_assignment / gemini_vision.py

Initial clean deployment commit

05cb41b 26 days ago

950 Bytes

	import os
	from google import genai
	from google.genai import types
	from PIL import Image

	def extract_text_gemini(image_path: str) -> str:
	"""
	Sends an image to Gemini Vision and extracts the text/data.
	Requires GEMINI_API_KEY environment variable to be set.
	"""
	# Initialize the client (automatically picks up GEMINI_API_KEY from env)
	client = genai.Client()

	# Load the image
	image = Image.open(image_path)

	# Define the prompt
	prompt = "Extract all text, handwriting, and tabular data from this medical document accurately."

	# Call the model (gemini-2.5-flash is currently recommended for fast, multimodal tasks)
	response = client.models.generate_content(
	model="gemini-2.5-flash",
	contents=[prompt, image]
	)

	return response.text

	# Example Usage:
	api_key = os.environ["GEMINI_API_KEY"]
	text_output = extract_text_gemini("handwriting.png")
	print(text_output)