Spaces:

samwell
/

cosmos-predict2-space

Paused

App Files Files Community

samwell commited on Jan 17

Commit

850ecac

verified ·

1 Parent(s): 0c995b9

Explicitly move all pipeline components to CUDA, use inference_mode

Browse files

Files changed (1) hide show

handler.py +26 -16

handler.py CHANGED Viewed

@@ -12,6 +12,7 @@ app = FastAPI()
 # Global pipeline
 pipe = None
 export_to_video = None
 class InferenceRequest(BaseModel):
     image: str  # base64 or URL
@@ -24,7 +25,7 @@ class InferenceRequest(BaseModel):
 @app.on_event("startup")
 async def load_model():
-    global pipe, export_to_video
     from diffusers import Cosmos2VideoToWorldPipeline
     from diffusers.utils import export_to_video as etv
@@ -37,13 +38,23 @@ async def load_model():
         torch_dtype=torch.bfloat16,
         token=os.environ.get("HF_TOKEN"),
     )
-    pipe.to("cuda")
     print("Model loaded successfully!")
 @app.post("/predict")
 @app.post("/")
 async def predict(request: dict):
-    global pipe, export_to_video
     # Handle both direct and nested input formats
     inputs = request.get("inputs", request)
@@ -56,14 +67,12 @@ async def predict(request: dict):
     if not prompt:
         raise HTTPException(status_code=400, detail="No prompt provided")
-    # Load image using diffusers' load_image for consistent preprocessing
     try:
-        from diffusers.utils import load_image
         if image_data.startswith("http"):
             image = load_image(image_data)
         else:
-            # Save base64 to temp file and load with load_image for consistent handling
             image_bytes = base64.b64decode(image_data)
             image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
@@ -79,15 +88,16 @@ async def predict(request: dict):
     guidance_scale = inputs.get("guidance_scale", 7.0)
     try:
-        # Run inference WITHOUT generator to avoid device mismatch
-        output = pipe(
-            image=image,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            num_frames=num_frames,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-        )
         video_path = "/tmp/output.mp4"
         export_to_video(output.frames[0], video_path, fps=16)

 # Global pipeline
 pipe = None
 export_to_video = None
+DEVICE = "cuda"
 class InferenceRequest(BaseModel):
     image: str  # base64 or URL
 @app.on_event("startup")
 async def load_model():
+    global pipe, export_to_video, DEVICE
     from diffusers import Cosmos2VideoToWorldPipeline
     from diffusers.utils import export_to_video as etv
         torch_dtype=torch.bfloat16,
         token=os.environ.get("HF_TOKEN"),
     )
+    pipe = pipe.to(DEVICE)
+    # Ensure all components are on the same device
+    if hasattr(pipe, 'text_encoder') and pipe.text_encoder is not None:
+        pipe.text_encoder = pipe.text_encoder.to(DEVICE)
+    if hasattr(pipe, 'vae') and pipe.vae is not None:
+        pipe.vae = pipe.vae.to(DEVICE)
+    if hasattr(pipe, 'transformer') and pipe.transformer is not None:
+        pipe.transformer = pipe.transformer.to(DEVICE)
     print("Model loaded successfully!")
+    print(f"Pipeline device: {pipe.device}")
 @app.post("/predict")
 @app.post("/")
 async def predict(request: dict):
+    global pipe, export_to_video, DEVICE
     # Handle both direct and nested input formats
     inputs = request.get("inputs", request)
     if not prompt:
         raise HTTPException(status_code=400, detail="No prompt provided")
+    # Load image
     try:
         if image_data.startswith("http"):
+            from diffusers.utils import load_image
             image = load_image(image_data)
         else:
             image_bytes = base64.b64decode(image_data)
             image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
     guidance_scale = inputs.get("guidance_scale", 7.0)
     try:
+        # Run without generator - let pipeline handle device placement
+        with torch.inference_mode():
+            output = pipe(
+                image=image,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+            )
         video_path = "/tmp/output.mp4"
         export_to_video(output.frames[0], video_path, fps=16)