samwell
/

cosmos-predict2-endpoint

Model card Files Files and versions

cosmos-predict2-endpoint / handler.py

samwell's picture

Clean handler.py

55bf60e verified about 1 month ago

2.43 kB

	import torch
	import base64
	import io
	from typing import Dict, Any
	from PIL import Image


	class EndpointHandler:
	def __init__(self, path: str = ""):
	from diffusers import Cosmos2VideoToWorldPipeline
	from diffusers.utils import export_to_video

	self.export_to_video = export_to_video
	model_id = "nvidia/Cosmos-Predict2-2B-Video2World"

	self.pipe = Cosmos2VideoToWorldPipeline.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	)
	self.pipe.to("cuda")

	def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
	inputs = data.get("inputs", data)

	image_data = inputs.get("image")
	if not image_data:
	return {"error": "No image provided"}

	try:
	if image_data.startswith("http"):
	from diffusers.utils import load_image
	image = load_image(image_data)
	else:
	image_bytes = base64.b64decode(image_data)
	image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
	except Exception as e:
	return {"error": f"Failed to load image: {str(e)}"}

	prompt = inputs.get("prompt", "")
	if not prompt:
	return {"error": "No prompt provided"}

	negative_prompt = inputs.get("negative_prompt", "ugly, static, blurry, low quality")
	num_frames = inputs.get("num_frames", 93)
	num_inference_steps = inputs.get("num_inference_steps", 35)
	guidance_scale = inputs.get("guidance_scale", 7.0)
	seed = inputs.get("seed")

	generator = torch.Generator(device="cuda").manual_seed(int(seed)) if seed else None

	try:
	output = self.pipe(
	image=image,
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_frames=num_frames,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	)

	video_path = "/tmp/output.mp4"
	self.export_to_video(output.frames[0], video_path, fps=16)

	with open(video_path, "rb") as f:
	video_b64 = base64.b64encode(f.read()).decode("utf-8")

	return {"video": video_b64, "content_type": "video/mp4"}

	except Exception as e:
	return {"error": f"Inference failed: {str(e)}"}