samwell
/

cosmos-predict2-endpoint

Model card Files Files and versions

samwell commited on 28 days ago

Commit

a44999a

·

verified ·

1 Parent(s): fda3463

Add handler.py

Files changed (1) hide show

handler.py +71 -0

handler.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+import base64
+import io
+from typing import Dict, Any
+from PIL import Image
+class EndpointHandler:
+    def __init__(self, path: str = ""):
+        from diffusers import Cosmos2VideoToWorldPipeline
+        from diffusers.utils import export_to_video
+        self.export_to_video = export_to_video
+        model_id = "nvidia/Cosmos-Predict2-2B-Video2World"
+        self.pipe = Cosmos2VideoToWorldPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch.bfloat16,
+        )
+        self.pipe.to("cuda")
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = data.get("inputs", data)
+        image_data = inputs.get("image")
+        if not image_data:
+            return {"error": "No image provided"}
+        try:
+            if image_data.startswith("http"):
+                from diffusers.utils import load_image
+                image = load_image(image_data)
+            else:
+                image_bytes = base64.b64decode(image_data)
+                image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        except Exception as e:
+            return {"error": f"Failed to load image: {str(e)}"}
+        prompt = inputs.get("prompt", "")
+        if not prompt:
+            return {"error": "No prompt provided"}
+        negative_prompt = inputs.get("negative_prompt", "ugly, static, blurry, low quality")
+        num_frames = inputs.get("num_frames", 93)
+        num_inference_steps = inputs.get("num_inference_steps", 35)
+        guidance_scale = inputs.get("guidance_scale", 7.0)
+        seed = inputs.get("seed")
+        generator = torch.Generator(device="cuda").manual_seed(int(seed)) if seed else None
+        try:
+            output = self.pipe(
+                image=image,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                generator=generator,
+            )
+            video_path = "/tmp/output.mp4"
+            self.export_to_video(output.frames[0], video_path, fps=16)
+            with open(video_path, "rb") as f:
+                video_b64 = base64.b64encode(f.read()).decode("utf-8")
+            return {"video": video_b64, "content_type": "video/mp4"}
+        except Exception as e:
+            return {"error": f"Inference failed: {str(e)}"}