Spaces:

Maximofn
/

HunyuanVideo

Runtime error

App Files Files Community

Maximofn commited on Jan 25, 2025

Commit

67c95e3

1 Parent(s): f179ff2

feat(src): :rocket: Update code with diffusers info

Browse files

Files changed (2) hide show

app.py +40 -41
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,31 +1,37 @@
 import os
 import time
-from pathlib import Path
 from datetime import datetime
 import gradio as gr
-import random
-import os
-from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.config import parse_args
-from hyvideo.inference import HunyuanVideoSampler
-from hyvideo.constants import NEGATIVE_PROMPT
-def initialize_model(model_path):
-    args = parse_args()
-    # models_root_path = Path(model_path)
-    # if not models_root_path.exists():
-    #     raise ValueError(f"`models_root` not exists: {models_root_path}")
-    hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(
-        model_path,
-        args=args,
-        device_map="auto"
     )
-    return hunyuan_video_sampler
 def generate_video(
-    model,
     prompt,
     resolution,
     video_length,
@@ -38,38 +44,32 @@ def generate_video(
     seed = None if seed == -1 else seed
     width, height = resolution.split("x")
     width, height = int(width), int(height)
-    negative_prompt = "" # not applicable in the inference
-    outputs = model.predict(
         prompt=prompt,
         height=height,
-        width=width,
-        video_length=video_length,
-        seed=seed,
-        negative_prompt=negative_prompt,
-        infer_steps=num_inference_steps,
         guidance_scale=guidance_scale,
-        num_videos_per_prompt=1,
-        flow_shift=flow_shift,
-        batch_size=1,
-        embedded_guidance_scale=embedded_guidance_scale
-    )
-    samples = outputs['samples']
-    sample = samples[0].unsqueeze(0)
     save_path = os.path.join(os.getcwd(), "gradio_outputs")
     os.makedirs(save_path, exist_ok=True)
     time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
-    video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
-    save_videos_grid(sample, video_path, fps=24)
     print(f'Sample saved to: {video_path}')
     return video_path
-def create_demo(model_path, save_path):
-    model = initialize_model(model_path)
     with gr.Blocks() as demo:
         gr.Markdown("# Hunyuan Video Generation")
@@ -119,7 +119,7 @@ def create_demo(model_path, save_path):
                 output = gr.Video(label="Generated Video")
         generate_btn.click(
-            fn=lambda *inputs: generate_video(model, *inputs),
             inputs=[
                 prompt,
                 resolution,
@@ -141,7 +141,6 @@ if __name__ == "__main__":
     server_name = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port = int(os.getenv("SERVER_PORT", "8081"))
     args = parse_args()
-    print(args)
-    model = "tencent/HunyuanVideo"
     demo = create_demo(model, args.save_path)
     demo.launch(server_name=server_name, server_port=server_port)

+import torch
+from diffusers import BitsAndBytesConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
 import os
 import time
 from datetime import datetime
 import gradio as gr
 from hyvideo.config import parse_args
+def initialize_model(model):
+    quant_config = BitsAndBytesConfig(load_in_8bit=True)
+    transformer_8bit = HunyuanVideoTransformer3DModel.from_pretrained(
+        model,
+        subfolder="transformer",
+        quantization_config=quant_config,
+        torch_dtype=torch.bfloat16,
+        device_map="balanced",
+    )
+    # Cargar el pipeline
+    pipeline = HunyuanVideoPipeline.from_pretrained(
+        model,
+        transformer=transformer_8bit,
+        torch_dtype=torch.float16,
+        device_map="balanced",
     )
+    return pipeline
 def generate_video(
+    pipeline,
     prompt,
     resolution,
     video_length,
     seed = None if seed == -1 else seed
     width, height = resolution.split("x")
     width, height = int(width), int(height)
+    # Generar el video usando el pipeline
+    video = pipeline(
         prompt=prompt,
         height=height,
+        width=width,
+        num_frames=video_length,
+        num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
+    ).frames[0]
+    # Guardar el video
     save_path = os.path.join(os.getcwd(), "gradio_outputs")
     os.makedirs(save_path, exist_ok=True)
     time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
+    video_path = f"{save_path}/{time_flag}_seed{seed}_{prompt[:100].replace('/','')}.mp4"
+    from diffusers.utils import export_to_video
+    export_to_video(video, video_path, fps=24)
     print(f'Sample saved to: {video_path}')
     return video_path
+def create_demo(model, save_path):
+    pipeline = initialize_model(model)
     with gr.Blocks() as demo:
         gr.Markdown("# Hunyuan Video Generation")
                 output = gr.Video(label="Generated Video")
         generate_btn.click(
+            fn=lambda *inputs: generate_video(pipeline, *inputs),
             inputs=[
                 prompt,
                 resolution,
     server_name = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port = int(os.getenv("SERVER_PORT", "8081"))
     args = parse_args()
+    model = "hunyuanvideo-community/HunyuanVideo"  # Actualizado el path del modelo
     demo = create_demo(model, args.save_path)
     demo.launch(server_name=server_name, server_port=server_port)

requirements.txt CHANGED Viewed

@@ -2,7 +2,8 @@ torch==2.4.0
 torchvision==0.19.0
 torchaudio==2.4.0
 opencv-python==4.9.0.80
-diffusers==0.31.0
 transformers==4.46.3
 tokenizers==0.20.3
 accelerate==1.1.1
@@ -15,6 +16,7 @@ imageio==2.34.0
 imageio-ffmpeg==0.5.1
 safetensors==0.4.3
 gradio==5.0.0
 # ninja
 # git+https://github.com/Dao-AILab/flash-attention.git@v2.6.3
 # xfuser==0.4.0

 torchvision==0.19.0
 torchaudio==2.4.0
 opencv-python==4.9.0.80
+# diffusers==0.31.0
+git+https://github.com/huggingface/diffusers
 transformers==4.46.3
 tokenizers==0.20.3
 accelerate==1.1.1
 imageio-ffmpeg==0.5.1
 safetensors==0.4.3
 gradio==5.0.0
+bitsandbytes
 # ninja
 # git+https://github.com/Dao-AILab/flash-attention.git@v2.6.3
 # xfuser==0.4.0