root commited on
Commit
68fad5b
·
1 Parent(s): ca00209

reverting back to moore animate

Browse files
MusePose DELETED
@@ -1 +0,0 @@
1
- Subproject commit 124543e3ff347b508a2c489c4344f5f40190c5d3
 
 
__pycache__/handler.cpython-310.pyc CHANGED
Binary files a/__pycache__/handler.cpython-310.pyc and b/__pycache__/handler.cpython-310.pyc differ
 
handler.py CHANGED
@@ -4,19 +4,19 @@ from PIL import Image
4
  import base64
5
  from io import BytesIO
6
  import numpy as np
7
- # from diffusers import AutoencoderKL, DDIMScheduler
8
  from einops import repeat
9
  from omegaconf import OmegaConf
10
- # from transformers import CLIPVisionModelWithProjection
11
  import cv2
12
  import os
13
  import sys
14
  import skvideo.io
15
- # from src.models.pose_guider import PoseGuider
16
- # from src.models.unet_2d_condition import UNet2DConditionModel
17
- # from src.models.unet_3d import UNet3DConditionModel
18
- # from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline
19
- # from src.utils.util import read_frames, get_fps, save_videos_grid
20
  import roop.globals
21
  from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
22
  from roop.utilities import normalize_output_path
@@ -29,6 +29,9 @@ import subprocess
29
  import requests
30
  import tempfile
31
 
 
 
 
32
 
33
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
34
 
@@ -43,21 +46,11 @@ class EndpointHandler():
43
  if not os.path.exists(config_path):
44
  raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
45
 
46
- self.run_post_install()
47
  self.config = OmegaConf.load(config_path)
48
  self.weight_dtype = torch.float16
49
  self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
50
  self.pipeline = None
51
- # self._initialize_pipeline()
52
-
53
- def run_post_install(self):
54
- try:
55
- result = subprocess.run(['bash', 'post_install.sh'], check=True, capture_output=True, text=True)
56
- print("Post-install script ran successfully.")
57
- print(result.stdout)
58
- except subprocess.CalledProcessError as e:
59
- print("Error running post-install script.")
60
- print(e.stderr)
61
 
62
  def _initialize_pipeline(self):
63
  base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -141,14 +134,13 @@ class EndpointHandler():
141
 
142
  return cropped_face
143
 
144
- def _swap_face(self, source_path, target_video_path):
145
  # source_path = "input.jpg"
146
  # source_image.save(source_path, format="JPEG", quality=95)
147
- output_path = "output.mp4"
148
 
149
  roop.globals.source_path = source_path
150
  roop.globals.target_path = target_video_path
151
- roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, output_path)
152
  roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
153
  roop.globals.headless = True
154
  roop.globals.keep_fps = True
@@ -286,6 +278,12 @@ class EndpointHandler():
286
  inputs = data.get("inputs", {})
287
  ref_image_url = inputs.get("ref_image_url", "")
288
  video_url = inputs.get("video_url", "")
 
 
 
 
 
 
289
 
290
  # Create a unique temporary directory for this request
291
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -302,21 +300,37 @@ class EndpointHandler():
302
  self.download_file(ref_image_url, downloaded_image_path)
303
  ref_image = Image.open(downloaded_image_path)
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  pose_output_path = os.path.join(temp_dir, "pose_videos")
306
 
307
  # Run the extract_dwpose_from_vid.py script
308
  command = [
309
- "python", "./MusePose/pose_align.py",
310
- "--imgfn_refer", downloaded_image_path,
311
- "--vidfn", downloaded_video_path,
312
- "--output_dir", pose_output_path
313
  ]
314
  result = subprocess.run(command, capture_output=True, text=True)
315
  if result.returncode != 0:
316
  raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
317
 
318
  # Locate the extracted pose video
319
- pose_video_path = os.path.join(pose_output_path, "pose_video.mp4")
 
 
320
 
321
  if not os.path.exists(pose_video_path):
322
  print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
@@ -326,32 +340,36 @@ class EndpointHandler():
326
 
327
  # Speed up the pose video by 4x
328
  sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
329
- self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=1)
330
-
331
- dancing_video_dir = os.path.join(temp_dir, "dancing_video")
332
- dancing_video_path_final = os.path.join(temp_dir, "dancing_video", "dance.mp4") #This is in create_video, can change there
333
-
334
- command = [
335
- "python", "./MusePose/create_video.py",
336
- "--ref_image_path", downloaded_image_path,
337
- "--pose_video_path", sped_up_pose_video_path,
338
- "-W", "512",
339
- "-H", "512",
340
- "--output_dir", dancing_video_dir
341
- ]
342
- result = subprocess.run(command, capture_output=True, text=True)
343
- if result.returncode != 0:
344
- raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
345
-
346
- # save_dir = os.path.join(temp_dir, "output")
347
- # if not os.path.exists(save_dir):
348
- # os.makedirs(save_dir, exist_ok=True)
349
- # animation_path = os.path.join(save_dir, "animation_output.mp4")
350
- # save_videos_grid(video, animation_path, n_rows=1, fps=src_fps)
 
 
 
 
351
 
352
  # Crop the face from the reference image and save it
353
- cropped_face_path = os.path.join(temp_dir, "cropped_face.jpg")
354
- cropped_face = self._crop_face(ref_image, save_path=cropped_face_path)
355
 
356
  # Delete the pipeline and clear CUDA cache to free up memory
357
  del self.pipeline
@@ -359,22 +377,23 @@ class EndpointHandler():
359
 
360
  # Perform face swapping
361
  # self.print_directory_contents(temp_dir)
362
- # swapped_face_video_path = self._swap_face(cropped_face_path, animation_path)
 
363
 
364
  # Slow down the produced video by 4x
365
  self.print_directory_contents(temp_dir)
366
- slowed_down_animation_path = os.path.join(temp_dir, "slowed_down_animation_output.mp4")
367
- self.slow_down_video(dancing_video_path_final, slowed_down_animation_path, factor=1)
368
 
369
  # Clear CUDA cache before RIFE interpolation
370
  torch.cuda.empty_cache()
371
 
372
  # Perform RIFE interpolation
373
- # rife_output_path = os.path.join(temp_dir, "completed_result.mp4")
374
- # self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
375
 
376
  # Encode the final video in base64
377
- with open(slowed_down_animation_path, "rb") as video_file:
378
  video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
379
 
380
  torch.cuda.empty_cache()
 
4
  import base64
5
  from io import BytesIO
6
  import numpy as np
7
+ from diffusers import AutoencoderKL, DDIMScheduler
8
  from einops import repeat
9
  from omegaconf import OmegaConf
10
+ from transformers import CLIPVisionModelWithProjection
11
  import cv2
12
  import os
13
  import sys
14
  import skvideo.io
15
+ from src.models.pose_guider import PoseGuider
16
+ from src.models.unet_2d_condition import UNet2DConditionModel
17
+ from src.models.unet_3d import UNet3DConditionModel
18
+ from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline
19
+ from src.utils.util import read_frames, get_fps, save_videos_grid
20
  import roop.globals
21
  from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
22
  from roop.utilities import normalize_output_path
 
29
  import requests
30
  import tempfile
31
 
32
+ from rembg import remove
33
+ import onnxruntime as ort
34
+
35
 
36
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
37
 
 
46
  if not os.path.exists(config_path):
47
  raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
48
 
 
49
  self.config = OmegaConf.load(config_path)
50
  self.weight_dtype = torch.float16
51
  self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
52
  self.pipeline = None
53
+ self._initialize_pipeline()
 
 
 
 
 
 
 
 
 
54
 
55
  def _initialize_pipeline(self):
56
  base_dir = os.path.dirname(os.path.abspath(__file__))
 
134
 
135
  return cropped_face
136
 
137
+ def _swap_face(self, source_path, target_video_path, output_path):
138
  # source_path = "input.jpg"
139
  # source_image.save(source_path, format="JPEG", quality=95)
 
140
 
141
  roop.globals.source_path = source_path
142
  roop.globals.target_path = target_video_path
143
+ roop.globals.output_path = output_path
144
  roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
145
  roop.globals.headless = True
146
  roop.globals.keep_fps = True
 
278
  inputs = data.get("inputs", {})
279
  ref_image_url = inputs.get("ref_image_url", "")
280
  video_url = inputs.get("video_url", "")
281
+ width = inputs.get("width", 512)
282
+ height = inputs.get("height", 768)
283
+ length = inputs.get("length", 24)
284
+ num_inference_steps = inputs.get("num_inference_steps", 25)
285
+ cfg = inputs.get("cfg", 3.5)
286
+ seed = inputs.get("seed", 123)
287
 
288
  # Create a unique temporary directory for this request
289
  with tempfile.TemporaryDirectory() as temp_dir:
 
300
  self.download_file(ref_image_url, downloaded_image_path)
301
  ref_image = Image.open(downloaded_image_path)
302
 
303
+ # Calculate new dimensions
304
+ original_width, original_height = ref_image.size
305
+ max_dimension = max(original_width, original_height)
306
+ if max_dimension > 600:
307
+ ratio = max_dimension / 600
308
+ width = int(original_width / ratio)
309
+ height = int(original_height / ratio)
310
+ else:
311
+ width = original_width
312
+ height = original_height
313
+
314
+ # Remove the background from the reference image
315
+ ref_image_no_bg = remove(ref_image)
316
+ ref_image_no_bg_path = os.path.join(video_root, "ref_image_no_bg.png")
317
+ ref_image_no_bg.save(ref_image_no_bg_path)
318
+
319
  pose_output_path = os.path.join(temp_dir, "pose_videos")
320
 
321
  # Run the extract_dwpose_from_vid.py script
322
  command = [
323
+ "python", "extract_dwpose_from_vid.py",
324
+ "--video_root", video_root
 
 
325
  ]
326
  result = subprocess.run(command, capture_output=True, text=True)
327
  if result.returncode != 0:
328
  raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
329
 
330
  # Locate the extracted pose video
331
+ save_dir = video_root + "_dwpose"
332
+ print(f"Expected save directory: {save_dir}") # Debug statement
333
+ pose_video_path = os.path.join(save_dir, "downloaded_video.mp4")
334
 
335
  if not os.path.exists(pose_video_path):
336
  print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
 
340
 
341
  # Speed up the pose video by 4x
342
  sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
343
+ self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=4)
344
+
345
+ torch.manual_seed(seed)
346
+ pose_images = read_frames(sped_up_pose_video_path)
347
+ src_fps = get_fps(sped_up_pose_video_path)
348
+
349
+ pose_list = []
350
+ total_length = min(length, len(pose_images))
351
+ for pose_image_pil in pose_images[:total_length]:
352
+ pose_list.append(pose_image_pil)
353
+
354
+ video = self.pipeline(
355
+ ref_image_no_bg,
356
+ pose_list,
357
+ width=width,
358
+ height=height,
359
+ video_length=total_length,
360
+ num_inference_steps=num_inference_steps,
361
+ guidance_scale=cfg
362
+ ).videos
363
+
364
+ save_dir = os.path.join(temp_dir, "output")
365
+ if not os.path.exists(save_dir):
366
+ os.makedirs(save_dir, exist_ok=True)
367
+ animation_path = os.path.join(save_dir, "animation_output.mp4")
368
+ save_videos_grid(video, animation_path, n_rows=1, fps=src_fps)
369
 
370
  # Crop the face from the reference image and save it
371
+ cropped_face_path = os.path.join(save_dir, "cropped_face.jpg")
372
+ cropped_face = self._crop_face(ref_image_no_bg, save_path=cropped_face_path)
373
 
374
  # Delete the pipeline and clear CUDA cache to free up memory
375
  del self.pipeline
 
377
 
378
  # Perform face swapping
379
  # self.print_directory_contents(temp_dir)
380
+ # swapped_face_video_path = os.path.join(save_dir, "swapped_face_output.mp4")
381
+ # self._swap_face(cropped_face_path, animation_path, swapped_face_video_path)
382
 
383
  # Slow down the produced video by 4x
384
  self.print_directory_contents(temp_dir)
385
+ slowed_down_animation_path = os.path.join(save_dir, "slowed_down_animation_output.mp4")
386
+ self.slow_down_video(animation_path, slowed_down_animation_path, factor=4)
387
 
388
  # Clear CUDA cache before RIFE interpolation
389
  torch.cuda.empty_cache()
390
 
391
  # Perform RIFE interpolation
392
+ rife_output_path = os.path.join(save_dir, "completed_result.mp4")
393
+ self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
394
 
395
  # Encode the final video in base64
396
+ with open(rife_output_path, "rb") as video_file:
397
  video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
398
 
399
  torch.cuda.empty_cache()
input.jpg DELETED
Binary file (20.3 kB)
 
me.jpeg DELETED
Binary file (82.6 kB)
 
output/gradio/animation_output.mp4 DELETED
Binary file (103 kB)
 
output/gradio/cropped_face.jpg DELETED
Binary file (95.4 kB)
 
pose_video.mp4 DELETED
Binary file (755 kB)
 
pretrained_weights/DWPose/dw-ll_ucoco_384.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
3
+ size 134399116
pretrained_weights/DWPose/yolox_l.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
3
+ size 216746733
pretrained_weights/denoising_unet.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e5a2c34fac369e8a922972ca2210916c6af175a0dad907deccf6235816ad52
3
+ size 3438374293
pretrained_weights/image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jpinkney/.cache/huggingface/diffusers/models--lambdalabs--sd-image-variations-diffusers/snapshots/ca6f97f838ae1b5bf764f31363a21f388f4d8f3e/image_encoder",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "quick_gelu",
9
+ "hidden_size": 1024,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 24,
19
+ "patch_size": 14,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1"
23
+ }
pretrained_weights/image_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d2aa29b5fdf64f3ad4f45fb4227ea98bc45156bbae673b85be1af7783dbabb
3
+ size 1215993967
pretrained_weights/motion_module.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d11e01a281b39880da2efeea892215c1313e5713fca3d100a7fbb72ee312ef9
3
+ size 1817900227
pretrained_weights/pose_guider.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8b7c1b4db92980fd977b4fd003c1396bbae9a9cdea00c35d452136d5e4f488
3
+ size 4351337
pretrained_weights/reference_unet.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beddccb08d49a8b29b0f4d6d456c6521d4382a8d8d48884fa60ba8802509c214
3
+ size 3438323817
pretrained_weights/sd-vae-ft-mse/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.4.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
pretrained_weights/sd-vae-ft-mse/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
3
+ size 334707217
pretrained_weights/stable-diffusion-v1-5/unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 4,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
pretrained_weights/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7da0e21ba7ea50637bee26e81c220844defdf01aafca02b2c42ecdadb813de4
3
+ size 3438354725
requirements.txt CHANGED
@@ -57,3 +57,6 @@ sk-video==1.1.10
57
  moviepy==1.0.3
58
 
59
  requests==2.32.3
 
 
 
 
57
  moviepy==1.0.3
58
 
59
  requests==2.32.3
60
+
61
+
62
+ rembg
sampler.py CHANGED
@@ -10,10 +10,8 @@ handler = EndpointHandler()
10
  # Define sample inputs
11
  inputs = {
12
  "inputs": {
13
- "ref_image_url": "https://media.discordapp.net/attachments/1183633414612594708/1245882096116043887/image.jpg?ex=665a5d9f&is=66590c1f&hm=3065fed7b8f5bd13aa2c8ad7d97e625dd4c2977589dbe7d8c13d024b782ab25a&=&format=webp&width=672&height=1194",
14
  "video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
15
- "width": 378,
16
- "height": 504,
17
  "length": 24,
18
  "num_inference_steps": 25,
19
  "cfg": 3.5,
 
10
  # Define sample inputs
11
  inputs = {
12
  "inputs": {
13
+ "ref_image_url": "https://media.discordapp.net/attachments/1237667074210267217/1246013998042976276/image.jpg?ex=665ad876&is=665986f6&hm=e7f0e6fd51c1068c15f1a750ca97abb4b2a4bfed396160ff44cf1abecb489d11&=&format=webp&width=896&height=1194",
14
  "video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
 
 
15
  "length": 24,
16
  "num_inference_steps": 25,
17
  "cfg": 3.5,