Spaces:

breadlicker45
/

PaliGemma2

Sleeping

App Files Files Community

PaliGemma2 / app.py

breadlicker45

Update app.py

33262af verified 12 months ago

raw

history blame

2.08 kB

	import gradio as gr
	from transformers import AutoProcessor, AutoModelForImageTextToText
	from PIL import Image
	import torch
	import os
	import spaces # Import the spaces module


	def load_model():
	"""Load PaliGemma2 model and processor with Hugging Face token."""

	token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable

	if not token:
	raise ValueError(
	"Hugging Face API token not found. Please set it in the environment variables."
	)

	# Load the processor and model using the correct identifier
	processor = AutoProcessor.from_pretrained(
	"google/paligemma2-28b-pt-896", use_auth_token=token
	)
	model = AutoModelForImageTextToText.from_pretrained(
	"google/paligemma2-28b-pt-896", use_auth_token=token, torch_dtype=torch.bfloat16
	)

	# Move model to GPU if available
	if torch.cuda.is_available():
	model = model.to("cuda")

	return processor, model


	@spaces.GPU # Decorate the function that uses the GPU
	def process_image_and_text(image, text_input):
	"""Extract text from image using PaliGemma2."""
	processor, model = load_model()

	# Preprocess the image and text
	inputs = processor(text=text_input, images=image, return_tensors="pt").to(
	"cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16
	)

	# Generate predictions
	with torch.no_grad():
	generated_ids = model.generate(**inputs, max_new_tokens=100)
	text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return text


	if __name__ == "__main__":
	iface = gr.Interface(
	fn=process_image_and_text,
	inputs=[
	gr.Image(type="pil", label="Upload an image containing text"),
	gr.Textbox(label="Enter Text Prompt"),
	],
	outputs=gr.Textbox(label="Extracted/Generated Text"),
	title="Text Reading/Generation with PaliGemma2",
	description="Upload an image and enter a text prompt. The model will generate text based on both.",
	)
	iface.launch()