Spaces:

breadlicker45
/

PaliGemma2

Sleeping

PaliGemma2 / app.py

Create app.py

c91d9f3 verified 12 months ago

1.68 kB

	import gradio as gr
	from transformers import AutoProcessor, AutoModelForImageTextToText
	from PIL import Image
	import torch
	import os
	import spaces # Import the spaces module


	def load_model():
	"""Load PaliGemma2 model and processor with Hugging Face token."""

	token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable

	if not token:
	raise ValueError(
	"Hugging Face API token not found. Please set it in the environment variables."
	)

	# Load the processor and model using the correct identifier
	processor = AutoProcessor.from_pretrained(
	"google/paligemma2-3b-pt-224", use_auth_token=token
	)
	model = AutoModelForImageTextToText.from_pretrained(
	"google/paligemma2-3b-pt-224", use_auth_token=token
	)

	return processor, model


	@spaces.GPU # Decorate the function that uses the GPU
	def process_image(image):
	"""Extract text from image using PaliGemma2."""
	processor, model = load_model()

	# Preprocess the image
	inputs = processor(images=image, return_tensors="pt")

	# Generate predictions
	with torch.no_grad():
	generated_ids = model.generate(**inputs)
	text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return text


	if __name__ == "__main__":
	iface = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil", label="Upload an image containing text"),
	outputs=gr.Textbox(label="Extracted Text"),
	title="Text Reading from Images using PaliGemma2",
	description="Upload an image containing text and the model will extract the text.",
	)
	iface.launch()