Spaces:

florian-morel22
/

CoVT

Sleeping

CoVT / app.py

Florian

rm images

128ad5a about 1 month ago

7.36 kB

	import time

	# os.environ["GRADIO_TEMP_DIR"] = (
	# "/home/agent_vision@BEIJAFLORE.COM/fmorel/CoVT-main/CoVT-main/gradio/temp"
	# )
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration

	# ================= Configuration Area =================
	# You can change these defaults as you like
	DEFAULT_MODEL_NAME = "Wakals/CoVT-7B-seg_depth_dino"
	DEFAULT_CKPT_PATH = None # Or set to your local checkpoint path
	# ======================================================

	# Global cache for model and processor to avoid re-loading every call
	_cached_model = None
	_cached_processor = None


	def load_model_and_processor(
	model_name: str,
	ckpt: str = None,
	):
	"""
	Load a single CoVT-7B model and its corresponding processor.
	"""
	if ckpt is not None:
	print(f"Loading model from ckpt: {ckpt}")
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	ckpt, torch_dtype=torch.bfloat16, device_map="auto"
	).eval()
	processor = AutoProcessor.from_pretrained(
	ckpt, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28
	)
	else:
	print(f"Loading model from hub: {model_name}")
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	model_name, torch_dtype=torch.bfloat16, device_map="auto"
	).eval()
	processor = AutoProcessor.from_pretrained(
	model_name, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28
	)

	return model, processor


	def get_cached_model_and_processor(
	model_name: str = DEFAULT_MODEL_NAME,
	ckpt: str = DEFAULT_CKPT_PATH,
	):
	"""
	Lazy-load and cache the model and processor so they are not reloaded every request.
	"""
	global _cached_model, _cached_processor

	# If already loaded, just return them
	if _cached_model is not None and _cached_processor is not None:
	return _cached_model, _cached_processor

	# Otherwise load and cache
	_cached_model, _cached_processor = load_model_and_processor(
	model_name=model_name,
	ckpt=ckpt,
	)
	return _cached_model, _cached_processor


	def run_single_inference(
	model,
	processor,
	image, # can be either a PIL.Image or a path string
	question: str,
	max_new_tokens: int = 512,
	temperature: float = 0.0,
	top_p: float = 0.9,
	do_sample: bool = False,
	seed: int = 42,
	):
	"""
	Single inference: given one image and one question, return answer and elapsed time.
	"""
	# 1) Prepare conversation
	# For Gradio we usually get a PIL image, but we also support a path string for compatibility.
	if isinstance(image, str):
	pil_image = Image.open(image).convert("RGB")
	image_ref = image # path for the "image" field
	elif isinstance(image, Image.Image):
	pil_image = image.convert("RGB")
	# When using PIL image in chat template, you can pass a placeholder
	# and rely on 'images' argument in processor; here we still need a "dummy" reference.
	image_ref = (
	"gradio_image" # this is not used as a real path, just a placeholder
	)
	else:
	raise ValueError("image must be a PIL.Image or a path string.")

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image_ref},
	{"type": "text", "text": question},
	],
	}
	]

	# 2) Apply chat template
	prompt = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	# 3) Encode image and text
	inputs = processor(text=[prompt], images=[pil_image], return_tensors="pt")

	# Move inputs to the same device as the model
	device = model.device
	inputs = {
	k: (v.to(device) if isinstance(v, torch.Tensor) else v)
	for k, v in inputs.items()
	}

	print(">>>>>>>>>>>>< DEVICE ", device.type)
	# 4) Timing + generation
	if device.type == "cuda":
	torch.cuda.empty_cache()
	torch.cuda.synchronize()

	start = time.time()
	with torch.no_grad():
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=do_sample,
	pad_token_id=processor.tokenizer.eos_token_id,
	eos_token_id=processor.tokenizer.eos_token_id,
	)
	if device.type == "cuda":
	torch.cuda.synchronize()
	end = time.time()

	elapsed = end - start

	# 5) Decode only newly generated tokens
	input_len = inputs["input_ids"].shape[1]
	new_tokens = generated_ids[0, input_len:]
	answer = processor.decode(new_tokens, skip_special_tokens=True)

	return answer, elapsed


	def gradio_inference(
	image,
	question,
	max_new_tokens,
	temperature,
	top_p,
	seed,
	):
	"""
	Wrapper function for Gradio that calls the inference logic and returns answer + time cost.
	"""
	if image is None:
	return "Please upload an image.", 0.0

	# Get (or load) model and processor
	model, processor = get_cached_model_and_processor()

	# Run inference
	answer, elapsed = run_single_inference(
	model=model,
	processor=processor,
	image=image, # filepath string from Gradio
	question=question,
	max_new_tokens=int(max_new_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	do_sample=(temperature > 0.0),
	seed=int(seed),
	)

	return answer, elapsed


	# ===================== Gradio UI =====================


	def build_demo():
	with gr.Blocks() as demo:
	gr.Markdown(
	"# CoVT-7B Gradio Demo\n"
	"Upload an image and input a question to run visual question answering."
	)

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(label="Input Image", type="pil")
	question_input = gr.Textbox(label="Question", value="", lines=2)
	max_new_tokens = gr.Slider(
	label="max_new_tokens", minimum=1, maximum=1024, value=512, step=1
	)
	temperature = gr.Slider(
	label="temperature", minimum=0.0, maximum=1.0, value=0.0, step=0.01
	)
	top_p = gr.Slider(
	label="top_p", minimum=0.1, maximum=1.0, value=0.9, step=0.01
	)
	seed = gr.Slider(
	label="random_seed", minimum=0, maximum=1000, value=42, step=1
	)

	run_button = gr.Button("Run Inference")

	with gr.Column():
	answer_output = gr.Textbox(label="Answer", lines=10)
	elapsed_output = gr.Number(label="Elapsed time (seconds)")

	run_button.click(
	fn=gradio_inference,
	inputs=[
	image_input,
	question_input,
	max_new_tokens,
	temperature,
	top_p,
	seed,
	],
	outputs=[answer_output, elapsed_output],
	)

	return demo


	if __name__ == "__main__":
	demo = build_demo()
	# You can set share=True if you want a public link
	demo.launch()