Spaces:

WaysAheadGlobal
/

Blip

Build error

App Files Files Community

Blip / app.py

WaysAheadGlobal

Update app.py

ef81d40 verified 6 months ago

raw

history blame contribute delete

1.86 kB

	# app.py

	import gradio as gr
	from tinyllava.model.builder import load_pretrained_model
	from tinyllava.utils import disable_torch_init
	from tinyllava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path
	import torch
	from PIL import Image

	# --- Disable unnecessary torch init ---
	disable_torch_init()

	# --- Load TinyLLaVA 3.1B ---
	model_path = "bczhou/TinyLLaVA-3.1B" # official HF ID
	tokenizer, model, image_processor, context_len = load_pretrained_model(
	model_path=model_path,
	model_base=None, # If you have a base model, point it here; else leave as is
	model_name="TinyLLaVA-3.1B"
	)

	device = torch.device("cpu")
	model.to(device)

	# --- Gradio handler ---
	def describe_image(image, prompt):
	# TinyLLaVA wants PIL
	image = Image.fromarray(image)
	image_tensor = process_images([image], image_processor, model.config)
	image_tensor = image_tensor.to(device)

	prompt = tokenizer_image_token(prompt, tokenizer, context_len)

	inputs = tokenizer([prompt])
	input_ids = torch.tensor(inputs.input_ids).unsqueeze(0).to(device)

	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	images=image_tensor,
	do_sample=True,
	temperature=0.2,
	max_new_tokens=200
	)

	out_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	return out_text

	iface = gr.Interface(
	fn=describe_image,
	inputs=[
	gr.Image(type="numpy", label="Image"),
	gr.Textbox(label="Your question", placeholder="What's happening in this image?")
	],
	outputs=gr.Textbox(label="TinyLLaVA Answer"),
	title="🦙 TinyLLaVA-3.1B — Vision-Language Q&A",
	description="A lightweight LLaVA variant that runs on CPU Spaces. Upload an image, ask a question."
	)

	if __name__ == "__main__":
	iface.launch()