Spaces:

samwell
/

cosmos-predict2-space

Paused

samwell commited on Jan 17

Commit

0c995b9

verified ·

1 Parent(s): 0e4048f

Remove generator entirely to fix CUDA/CPU device mismatch

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -56,17 +56,19 @@ async def predict(request: dict):
     if not prompt:
         raise HTTPException(status_code=400, detail="No prompt provided")
-    # Load image
     try:
         if image_data.startswith("http"):
-            from diffusers.utils import load_image
             image = load_image(image_data)
         else:
             image_bytes = base64.b64decode(image_data)
             image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         # Resize to expected dimensions for Cosmos Video2World (720P model)
-        image = image.resize((1280, 704))
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Failed to load image: {str(e)}")
@@ -75,12 +77,9 @@ async def predict(request: dict):
     num_frames = inputs.get("num_frames", 93)
     num_inference_steps = inputs.get("num_inference_steps", 35)
     guidance_scale = inputs.get("guidance_scale", 7.0)
-    seed = inputs.get("seed", 42)
-    # Generator WITHOUT device specification (let diffusers handle it)
-    generator = torch.Generator().manual_seed(int(seed))
     try:
         output = pipe(
             image=image,
             prompt=prompt,
@@ -88,7 +87,6 @@ async def predict(request: dict):
             num_frames=num_frames,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
-            generator=generator,
         )
         video_path = "/tmp/output.mp4"

     if not prompt:
         raise HTTPException(status_code=400, detail="No prompt provided")
+    # Load image using diffusers' load_image for consistent preprocessing
     try:
+        from diffusers.utils import load_image
         if image_data.startswith("http"):
             image = load_image(image_data)
         else:
+            # Save base64 to temp file and load with load_image for consistent handling
             image_bytes = base64.b64decode(image_data)
             image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         # Resize to expected dimensions for Cosmos Video2World (720P model)
+        image = image.resize((1280, 704), Image.Resampling.LANCZOS)
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Failed to load image: {str(e)}")
     num_frames = inputs.get("num_frames", 93)
     num_inference_steps = inputs.get("num_inference_steps", 35)
     guidance_scale = inputs.get("guidance_scale", 7.0)
     try:
+        # Run inference WITHOUT generator to avoid device mismatch
         output = pipe(
             image=image,
             prompt=prompt,
             num_frames=num_frames,
             num_inference_steps=num_inference_steps,
             guidance_scale=guidance_scale,
         )
         video_path = "/tmp/output.mp4"