Spaces:

NandiniLokeshReddy
/

QwenBaseModel

Build error

App Files Files Community

QwenBaseModel / app.py

NandiniLokeshReddy

Update app.py

761cff5 verified over 1 year ago

raw

history blame contribute delete

1.98 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import warnings

	# Suppress warnings
	warnings.filterwarnings('ignore')

	# Ensure CUDA device is used
	torch.set_default_device('cuda')

	# Load the model and tokenizer
	model_name = 'qnguyen3/nanoLLaVA-1.5'
	try:
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map='auto',
	trust_remote_code=True
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)
	except ImportError as e:
	print("Error: Missing required dependencies. Make sure flash_attn is installed.")
	raise e

	# Function to describe the uploaded image
	def describe_image(image, prompt="Describe this image in detail"):
	messages = [{"role": "user", "content": f'<image>\n{prompt}'}]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize the text
	text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
	input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)

	# Process the image
	image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)

	# Generate a response
	output_ids = model.generate(
	input_ids,
	images=image_tensor,
	max_new_tokens=2048,
	use_cache=True
	)[0]

	# Decode and return the response
	description = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
	return description

	# Set up the Gradio interface
	gr.Interface(
	fn=describe_image,
	inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(default="Describe this image in detail")],
	outputs="text",
	title="Image Description Model",
	description="Upload an image and receive a detailed description."
	).launch()