root commited on May 31, 2024

Commit

68fad5b

1 Parent(s): ca00209

reverting back to moore animate

Files changed (22) hide show

MusePose +0 -1
__pycache__/handler.cpython-310.pyc +0 -0
handler.py +75 -56
input.jpg +0 -0
me.jpeg +0 -0
output/gradio/animation_output.mp4 +0 -0
output/gradio/cropped_face.jpg +0 -0
pose_video.mp4 +0 -0
pretrained_weights/DWPose/dw-ll_ucoco_384.onnx +3 -0
pretrained_weights/DWPose/yolox_l.onnx +3 -0
pretrained_weights/denoising_unet.pth +3 -0
pretrained_weights/image_encoder/config.json +23 -0
pretrained_weights/image_encoder/pytorch_model.bin +3 -0
pretrained_weights/motion_module.pth +3 -0
pretrained_weights/pose_guider.pth +3 -0
pretrained_weights/reference_unet.pth +3 -0
pretrained_weights/sd-vae-ft-mse/config.json +29 -0
pretrained_weights/sd-vae-ft-mse/diffusion_pytorch_model.bin +3 -0
pretrained_weights/stable-diffusion-v1-5/unet/config.json +36 -0
pretrained_weights/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin +3 -0
requirements.txt +3 -0
sampler.py +1 -3

MusePose DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 124543e3ff347b508a2c489c4344f5f40190c5d3

__pycache__/handler.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/handler.cpython-310.pyc and b/__pycache__/handler.cpython-310.pyc differ

handler.py CHANGED Viewed

@@ -4,19 +4,19 @@ from PIL import Image
 import base64
 from io import BytesIO
 import numpy as np
-# from diffusers import AutoencoderKL, DDIMScheduler
 from einops import repeat
 from omegaconf import OmegaConf
-# from transformers import CLIPVisionModelWithProjection
 import cv2
 import os
 import sys
 import skvideo.io
-# from src.models.pose_guider import PoseGuider
-# from src.models.unet_2d_condition import UNet2DConditionModel
-# from src.models.unet_3d import UNet3DConditionModel
-# from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline
-# from src.utils.util import read_frames, get_fps, save_videos_grid
 import roop.globals
 from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
 from roop.utilities import normalize_output_path
@@ -29,6 +29,9 @@ import subprocess
 import requests
 import tempfile
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -43,21 +46,11 @@ class EndpointHandler():
         if not os.path.exists(config_path):
             raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
-        self.run_post_install()
         self.config = OmegaConf.load(config_path)
         self.weight_dtype = torch.float16
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.pipeline = None
-        # self._initialize_pipeline()
-    def run_post_install(self):
-        try:
-            result = subprocess.run(['bash', 'post_install.sh'], check=True, capture_output=True, text=True)
-            print("Post-install script ran successfully.")
-            print(result.stdout)
-        except subprocess.CalledProcessError as e:
-            print("Error running post-install script.")
-            print(e.stderr)
     def _initialize_pipeline(self):
         base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -141,14 +134,13 @@ class EndpointHandler():
         return cropped_face
-    def _swap_face(self, source_path, target_video_path):
         # source_path = "input.jpg"
         # source_image.save(source_path, format="JPEG", quality=95)
-        output_path = "output.mp4"
         roop.globals.source_path = source_path
         roop.globals.target_path = target_video_path
-        roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, output_path)
         roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
         roop.globals.headless = True
         roop.globals.keep_fps = True
@@ -286,6 +278,12 @@ class EndpointHandler():
         inputs = data.get("inputs", {})
         ref_image_url = inputs.get("ref_image_url", "")
         video_url = inputs.get("video_url", "")
         # Create a unique temporary directory for this request
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -302,21 +300,37 @@ class EndpointHandler():
             self.download_file(ref_image_url, downloaded_image_path)
             ref_image = Image.open(downloaded_image_path)
             pose_output_path = os.path.join(temp_dir, "pose_videos")
             # Run the extract_dwpose_from_vid.py script
             command = [
-                "python", "./MusePose/pose_align.py",
-                "--imgfn_refer", downloaded_image_path,
-                "--vidfn", downloaded_video_path,
-                "--output_dir", pose_output_path
             ]
             result = subprocess.run(command, capture_output=True, text=True)
             if result.returncode != 0:
                 raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
             # Locate the extracted pose video
-            pose_video_path = os.path.join(pose_output_path, "pose_video.mp4")
             if not os.path.exists(pose_video_path):
                 print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
@@ -326,32 +340,36 @@ class EndpointHandler():
             # Speed up the pose video by 4x
             sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
-            self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=1)
-            dancing_video_dir = os.path.join(temp_dir, "dancing_video")
-            dancing_video_path_final = os.path.join(temp_dir, "dancing_video", "dance.mp4") #This is in create_video, can change there
-            command = [
-                "python", "./MusePose/create_video.py",
-                "--ref_image_path", downloaded_image_path,
-                "--pose_video_path", sped_up_pose_video_path,
-                "-W", "512",
-                "-H", "512",
-                "--output_dir", dancing_video_dir
-            ]
-            result = subprocess.run(command, capture_output=True, text=True)
-            if result.returncode != 0:
-                raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
-            # save_dir = os.path.join(temp_dir, "output")
-            # if not os.path.exists(save_dir):
-            #     os.makedirs(save_dir, exist_ok=True)
-            # animation_path = os.path.join(save_dir, "animation_output.mp4")
-            # save_videos_grid(video, animation_path, n_rows=1, fps=src_fps)
             # Crop the face from the reference image and save it
-            cropped_face_path = os.path.join(temp_dir, "cropped_face.jpg")
-            cropped_face = self._crop_face(ref_image, save_path=cropped_face_path)
             # Delete the pipeline and clear CUDA cache to free up memory
             del self.pipeline
@@ -359,22 +377,23 @@ class EndpointHandler():
             # Perform face swapping
             # self.print_directory_contents(temp_dir)
-            # swapped_face_video_path = self._swap_face(cropped_face_path, animation_path)
             # Slow down the produced video by 4x
             self.print_directory_contents(temp_dir)
-            slowed_down_animation_path = os.path.join(temp_dir, "slowed_down_animation_output.mp4")
-            self.slow_down_video(dancing_video_path_final, slowed_down_animation_path, factor=1)
             # Clear CUDA cache before RIFE interpolation
             torch.cuda.empty_cache()
             # Perform RIFE interpolation
-            # rife_output_path = os.path.join(temp_dir, "completed_result.mp4")
-            # self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
             # Encode the final video in base64
-            with open(slowed_down_animation_path, "rb") as video_file:
                 video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
             torch.cuda.empty_cache()

 import base64
 from io import BytesIO
 import numpy as np
+from diffusers import AutoencoderKL, DDIMScheduler
 from einops import repeat
 from omegaconf import OmegaConf
+from transformers import CLIPVisionModelWithProjection
 import cv2
 import os
 import sys
 import skvideo.io
+from src.models.pose_guider import PoseGuider
+from src.models.unet_2d_condition import UNet2DConditionModel
+from src.models.unet_3d import UNet3DConditionModel
+from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline
+from src.utils.util import read_frames, get_fps, save_videos_grid
 import roop.globals
 from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
 from roop.utilities import normalize_output_path
 import requests
 import tempfile
+from rembg import remove
+import onnxruntime as ort
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         if not os.path.exists(config_path):
             raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
         self.config = OmegaConf.load(config_path)
         self.weight_dtype = torch.float16
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.pipeline = None
+        self._initialize_pipeline()
     def _initialize_pipeline(self):
         base_dir = os.path.dirname(os.path.abspath(__file__))
         return cropped_face
+    def _swap_face(self, source_path, target_video_path, output_path):
         # source_path = "input.jpg"
         # source_image.save(source_path, format="JPEG", quality=95)
         roop.globals.source_path = source_path
         roop.globals.target_path = target_video_path
+        roop.globals.output_path = output_path
         roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
         roop.globals.headless = True
         roop.globals.keep_fps = True
         inputs = data.get("inputs", {})
         ref_image_url = inputs.get("ref_image_url", "")
         video_url = inputs.get("video_url", "")
+        width = inputs.get("width", 512)
+        height = inputs.get("height", 768)
+        length = inputs.get("length", 24)
+        num_inference_steps = inputs.get("num_inference_steps", 25)
+        cfg = inputs.get("cfg", 3.5)
+        seed = inputs.get("seed", 123)
         # Create a unique temporary directory for this request
         with tempfile.TemporaryDirectory() as temp_dir:
             self.download_file(ref_image_url, downloaded_image_path)
             ref_image = Image.open(downloaded_image_path)
+            # Calculate new dimensions
+            original_width, original_height = ref_image.size
+            max_dimension = max(original_width, original_height)
+            if max_dimension > 600:
+                ratio = max_dimension / 600
+                width = int(original_width / ratio)
+                height = int(original_height / ratio)
+            else:
+                width = original_width
+                height = original_height
+            # Remove the background from the reference image
+            ref_image_no_bg = remove(ref_image)
+            ref_image_no_bg_path = os.path.join(video_root, "ref_image_no_bg.png")
+            ref_image_no_bg.save(ref_image_no_bg_path)
             pose_output_path = os.path.join(temp_dir, "pose_videos")
             # Run the extract_dwpose_from_vid.py script
             command = [
+                "python", "extract_dwpose_from_vid.py",
+                "--video_root", video_root
             ]
             result = subprocess.run(command, capture_output=True, text=True)
             if result.returncode != 0:
                 raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
             # Locate the extracted pose video
+            save_dir = video_root + "_dwpose"
+            print(f"Expected save directory: {save_dir}")  # Debug statement
+            pose_video_path = os.path.join(save_dir, "downloaded_video.mp4")
             if not os.path.exists(pose_video_path):
                 print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
             # Speed up the pose video by 4x
             sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
+            self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=4)
+            torch.manual_seed(seed)
+            pose_images = read_frames(sped_up_pose_video_path)
+            src_fps = get_fps(sped_up_pose_video_path)
+            pose_list = []
+            total_length = min(length, len(pose_images))
+            for pose_image_pil in pose_images[:total_length]:
+                pose_list.append(pose_image_pil)
+            video = self.pipeline(
+                ref_image_no_bg,
+                pose_list,
+                width=width,
+                height=height,
+                video_length=total_length,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=cfg
+            ).videos
+            save_dir = os.path.join(temp_dir, "output")
+            if not os.path.exists(save_dir):
+                os.makedirs(save_dir, exist_ok=True)
+            animation_path = os.path.join(save_dir, "animation_output.mp4")
+            save_videos_grid(video, animation_path, n_rows=1, fps=src_fps)
             # Crop the face from the reference image and save it
+            cropped_face_path = os.path.join(save_dir, "cropped_face.jpg")
+            cropped_face = self._crop_face(ref_image_no_bg, save_path=cropped_face_path)
             # Delete the pipeline and clear CUDA cache to free up memory
             del self.pipeline
             # Perform face swapping
             # self.print_directory_contents(temp_dir)
+            # swapped_face_video_path = os.path.join(save_dir, "swapped_face_output.mp4")
+            # self._swap_face(cropped_face_path, animation_path, swapped_face_video_path)
             # Slow down the produced video by 4x
             self.print_directory_contents(temp_dir)
+            slowed_down_animation_path = os.path.join(save_dir, "slowed_down_animation_output.mp4")
+            self.slow_down_video(animation_path, slowed_down_animation_path, factor=4)
             # Clear CUDA cache before RIFE interpolation
             torch.cuda.empty_cache()
             # Perform RIFE interpolation
+            rife_output_path = os.path.join(save_dir, "completed_result.mp4")
+            self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
             # Encode the final video in base64
+            with open(rife_output_path, "rb") as video_file:
                 video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
             torch.cuda.empty_cache()

input.jpg DELETED Viewed

Binary file (20.3 kB)

me.jpeg DELETED Viewed

Binary file (82.6 kB)

output/gradio/animation_output.mp4 DELETED Viewed

Binary file (103 kB)

output/gradio/cropped_face.jpg DELETED Viewed

Binary file (95.4 kB)

pose_video.mp4 DELETED Viewed

Binary file (755 kB)

pretrained_weights/DWPose/dw-ll_ucoco_384.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
+size 134399116

pretrained_weights/DWPose/yolox_l.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
+size 216746733

pretrained_weights/denoising_unet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e5a2c34fac369e8a922972ca2210916c6af175a0dad907deccf6235816ad52
+size 3438374293

pretrained_weights/image_encoder/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "_name_or_path": "/home/jpinkney/.cache/huggingface/diffusers/models--lambdalabs--sd-image-variations-diffusers/snapshots/ca6f97f838ae1b5bf764f31363a21f388f4d8f3e/image_encoder",
+  "architectures": [
+    "CLIPVisionModelWithProjection"
+  ],
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 1024,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 14,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.1"
+}

pretrained_weights/image_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89d2aa29b5fdf64f3ad4f45fb4227ea98bc45156bbae673b85be1af7783dbabb
+size 1215993967

pretrained_weights/motion_module.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d11e01a281b39880da2efeea892215c1313e5713fca3d100a7fbb72ee312ef9
+size 1817900227

pretrained_weights/pose_guider.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a8b7c1b4db92980fd977b4fd003c1396bbae9a9cdea00c35d452136d5e4f488
+size 4351337

pretrained_weights/reference_unet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:beddccb08d49a8b29b0f4d6d456c6521d4382a8d8d48884fa60ba8802509c214
+size 3438323817

pretrained_weights/sd-vae-ft-mse/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.4.2",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

pretrained_weights/sd-vae-ft-mse/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
+size 334707217

pretrained_weights/stable-diffusion-v1-5/unet/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.6.0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}

pretrained_weights/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7da0e21ba7ea50637bee26e81c220844defdf01aafca02b2c42ecdadb813de4
+size 3438354725

requirements.txt CHANGED Viewed

@@ -57,3 +57,6 @@ sk-video==1.1.10
 moviepy==1.0.3
 requests==2.32.3

 moviepy==1.0.3
 requests==2.32.3
+rembg

sampler.py CHANGED Viewed

@@ -10,10 +10,8 @@ handler = EndpointHandler()
 # Define sample inputs
 inputs = {
     "inputs": {
-        "ref_image_url": "https://media.discordapp.net/attachments/1183633414612594708/1245882096116043887/image.jpg?ex=665a5d9f&is=66590c1f&hm=3065fed7b8f5bd13aa2c8ad7d97e625dd4c2977589dbe7d8c13d024b782ab25a&=&format=webp&width=672&height=1194",
         "video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
-        "width": 378,
-        "height": 504,
         "length": 24,
         "num_inference_steps": 25,
         "cfg": 3.5,

 # Define sample inputs
 inputs = {
     "inputs": {
+        "ref_image_url": "https://media.discordapp.net/attachments/1237667074210267217/1246013998042976276/image.jpg?ex=665ad876&is=665986f6&hm=e7f0e6fd51c1068c15f1a750ca97abb4b2a4bfed396160ff44cf1abecb489d11&=&format=webp&width=896&height=1194",
         "video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
         "length": 24,
         "num_inference_steps": 25,
         "cfg": 3.5,