jbilcke-hf
/

HunyuanVideoGP-HFIE

Text-to-Video

Safetensors

Model card Files Files and versions

xet

Community

jbilcke-hf commited on Dec 16, 2024

Commit

de858d1

verified ·

1 Parent(s): 3f51080

Update handler.py

Browse files

Files changed (1) hide show

handler.py +93 -56

handler.py CHANGED Viewed

@@ -4,10 +4,14 @@ from pathlib import Path
 import time
 from datetime import datetime
 import argparse
 from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.inference import HunyuanVideoSampler
 from hyvideo.constants import NEGATIVE_PROMPT
 def get_default_args():
     """Create default arguments instead of parsing from command line"""
     parser = argparse.ArgumentParser()
@@ -95,38 +99,60 @@ def get_default_args():
 class EndpointHandler:
     def __init__(self, path: str = ""):
         """Initialize the handler with model path and default config."""
         # Use default args instead of parsing from command line
         self.args = get_default_args()
         # Set up model paths
         self.args.model_base = path
-        self.args.dit_weight = str(Path(path) / "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt")
         # Initialize model
         models_root_path = Path(path)
         if not models_root_path.exists():
-            raise ValueError(f"`models_root` not exists: {models_root_path}")
-        self.model = HunyuanVideoSampler.from_pretrained(models_root_path, args=self.args)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """Process a single request
-        Args:
-            data: Dictionary containing:
-                - inputs (str): The prompt text
-                - resolution (str, optional): Video resolution like "1280x720"
-                - video_length (int, optional): Number of frames
-                - num_inference_steps (int, optional): Number of inference steps
-                - seed (int, optional): Random seed (-1 for random)
-                - guidance_scale (float, optional): Guidance scale value
-                - flow_shift (float, optional): Flow shift value
-                - embedded_guidance_scale (float, optional): Embedded guidance scale
-        Returns:
-            Dictionary containing the generated video as base64 string
-        """
         # Get inputs from request data
         prompt = data.pop("inputs", None)
         if prompt is None:
@@ -145,41 +171,52 @@ class EndpointHandler:
         flow_shift = float(data.pop("flow_shift", 7.0))
         embedded_guidance_scale = float(data.pop("embedded_guidance_scale", 6.0))
-        # Run inference
-        outputs = self.model.predict(
-            prompt=prompt,
-            height=height,
-            width=width,
-            video_length=video_length,
-            seed=seed,
-            negative_prompt="",
-            infer_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            num_videos_per_prompt=1,
-            flow_shift=flow_shift,
-            batch_size=1,
-            embedded_guidance_scale=embedded_guidance_scale
-        )
-        # Get the video tensor
-        samples = outputs['samples']
-        sample = samples[0].unsqueeze(0)
-        # Save to temporary file
-        temp_path = "/tmp/temp_video.mp4"
-        save_videos_grid(sample, temp_path, fps=24)
-        # Read video file and convert to base64
-        with open(temp_path, "rb") as f:
-            video_bytes = f.read()
-        import base64
-        video_base64 = base64.b64encode(video_bytes).decode()
-        # Cleanup
-        os.remove(temp_path)
-        return {
-            "video_base64": video_base64,
-            "seed": outputs['seeds'][0],
-            "prompt": outputs['prompts'][0]
-        }

 import time
 from datetime import datetime
 import argparse
+from loguru import logger
 from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.inference import HunyuanVideoSampler
 from hyvideo.constants import NEGATIVE_PROMPT
+# Configure logger
+logger.add("handler_debug.log", rotation="500 MB")
 def get_default_args():
     """Create default arguments instead of parsing from command line"""
     parser = argparse.ArgumentParser()
 class EndpointHandler:
     def __init__(self, path: str = ""):
         """Initialize the handler with model path and default config."""
+        # Log the initial path
+        logger.info(f"Initializing EndpointHandler with path: {path}")
         # Use default args instead of parsing from command line
         self.args = get_default_args()
+        # Convert path to absolute path if not already
+        path = str(Path(path).absolute())
+        logger.info(f"Absolute path: {path}")
         # Set up model paths
         self.args.model_base = path
+        # Set paths for model components
+        dit_weight_path = Path(path) / "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt"
+        vae_path = Path(path) / "hunyuan-video-t2v-720p/vae"
+        # Log all critical paths
+        logger.info(f"Model base path: {self.args.model_base}")
+        logger.info(f"DiT weight path: {dit_weight_path}")
+        logger.info(f"VAE path: {vae_path}")
+        # Verify paths exist
+        logger.info("Checking if paths exist:")
+        logger.info(f"DiT weight exists: {dit_weight_path.exists()}")
+        logger.info(f"VAE path exists: {vae_path.exists()}")
+        if vae_path.exists():
+            logger.info(f"VAE path contents: {list(vae_path.glob('*'))}")
+        self.args.dit_weight = str(dit_weight_path)
         # Initialize model
         models_root_path = Path(path)
         if not models_root_path.exists():
+            raise ValueError(f"models_root_path does not exist: {models_root_path}")
+        # Log directory contents for debugging
+        logger.info("Directory contents:")
+        for item in models_root_path.glob("**/*"):
+            logger.info(f"  {item}")
+        try:
+            logger.info("Attempting to initialize HunyuanVideoSampler...")
+            self.model = HunyuanVideoSampler.from_pretrained(models_root_path, args=self.args)
+            logger.info("Successfully initialized HunyuanVideoSampler")
+        except Exception as e:
+            logger.error(f"Error initializing model: {str(e)}")
+            raise
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """Process a single request"""
+        # Log incoming request
+        logger.info(f"Processing request with data: {data}")
         # Get inputs from request data
         prompt = data.pop("inputs", None)
         if prompt is None:
         flow_shift = float(data.pop("flow_shift", 7.0))
         embedded_guidance_scale = float(data.pop("embedded_guidance_scale", 6.0))
+        logger.info(f"Processing with parameters: width={width}, height={height}, "
+                   f"video_length={video_length}, seed={seed}, "
+                   f"num_inference_steps={num_inference_steps}")
+        try:
+            # Run inference
+            outputs = self.model.predict(
+                prompt=prompt,
+                height=height,
+                width=width,
+                video_length=video_length,
+                seed=seed,
+                negative_prompt="",
+                infer_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                num_videos_per_prompt=1,
+                flow_shift=flow_shift,
+                batch_size=1,
+                embedded_guidance_scale=embedded_guidance_scale
+            )
+            # Get the video tensor
+            samples = outputs['samples']
+            sample = samples[0].unsqueeze(0)
+            # Save to temporary file
+            temp_path = "/tmp/temp_video.mp4"
+            save_videos_grid(sample, temp_path, fps=24)
+            # Read video file and convert to base64
+            with open(temp_path, "rb") as f:
+                video_bytes = f.read()
+            import base64
+            video_base64 = base64.b64encode(video_bytes).decode()
+            # Cleanup
+            os.remove(temp_path)
+            logger.info("Successfully generated and encoded video")
+            return {
+                "video_base64": video_base64,
+                "seed": outputs['seeds'][0],
+                "prompt": outputs['prompts'][0]
+            }
+        except Exception as e:
+            logger.error(f"Error during video generation: {str(e)}")
+            raise