root
commited on
Commit
·
68fad5b
1
Parent(s):
ca00209
reverting back to moore animate
Browse files- MusePose +0 -1
- __pycache__/handler.cpython-310.pyc +0 -0
- handler.py +75 -56
- input.jpg +0 -0
- me.jpeg +0 -0
- output/gradio/animation_output.mp4 +0 -0
- output/gradio/cropped_face.jpg +0 -0
- pose_video.mp4 +0 -0
- pretrained_weights/DWPose/dw-ll_ucoco_384.onnx +3 -0
- pretrained_weights/DWPose/yolox_l.onnx +3 -0
- pretrained_weights/denoising_unet.pth +3 -0
- pretrained_weights/image_encoder/config.json +23 -0
- pretrained_weights/image_encoder/pytorch_model.bin +3 -0
- pretrained_weights/motion_module.pth +3 -0
- pretrained_weights/pose_guider.pth +3 -0
- pretrained_weights/reference_unet.pth +3 -0
- pretrained_weights/sd-vae-ft-mse/config.json +29 -0
- pretrained_weights/sd-vae-ft-mse/diffusion_pytorch_model.bin +3 -0
- pretrained_weights/stable-diffusion-v1-5/unet/config.json +36 -0
- pretrained_weights/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin +3 -0
- requirements.txt +3 -0
- sampler.py +1 -3
MusePose
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 124543e3ff347b508a2c489c4344f5f40190c5d3
|
|
|
|
|
|
__pycache__/handler.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/handler.cpython-310.pyc and b/__pycache__/handler.cpython-310.pyc differ
|
|
|
handler.py
CHANGED
|
@@ -4,19 +4,19 @@ from PIL import Image
|
|
| 4 |
import base64
|
| 5 |
from io import BytesIO
|
| 6 |
import numpy as np
|
| 7 |
-
|
| 8 |
from einops import repeat
|
| 9 |
from omegaconf import OmegaConf
|
| 10 |
-
|
| 11 |
import cv2
|
| 12 |
import os
|
| 13 |
import sys
|
| 14 |
import skvideo.io
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
import roop.globals
|
| 21 |
from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
|
| 22 |
from roop.utilities import normalize_output_path
|
|
@@ -29,6 +29,9 @@ import subprocess
|
|
| 29 |
import requests
|
| 30 |
import tempfile
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 34 |
|
|
@@ -43,21 +46,11 @@ class EndpointHandler():
|
|
| 43 |
if not os.path.exists(config_path):
|
| 44 |
raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
|
| 45 |
|
| 46 |
-
self.run_post_install()
|
| 47 |
self.config = OmegaConf.load(config_path)
|
| 48 |
self.weight_dtype = torch.float16
|
| 49 |
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 50 |
self.pipeline = None
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
def run_post_install(self):
|
| 54 |
-
try:
|
| 55 |
-
result = subprocess.run(['bash', 'post_install.sh'], check=True, capture_output=True, text=True)
|
| 56 |
-
print("Post-install script ran successfully.")
|
| 57 |
-
print(result.stdout)
|
| 58 |
-
except subprocess.CalledProcessError as e:
|
| 59 |
-
print("Error running post-install script.")
|
| 60 |
-
print(e.stderr)
|
| 61 |
|
| 62 |
def _initialize_pipeline(self):
|
| 63 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -141,14 +134,13 @@ class EndpointHandler():
|
|
| 141 |
|
| 142 |
return cropped_face
|
| 143 |
|
| 144 |
-
def _swap_face(self, source_path, target_video_path):
|
| 145 |
# source_path = "input.jpg"
|
| 146 |
# source_image.save(source_path, format="JPEG", quality=95)
|
| 147 |
-
output_path = "output.mp4"
|
| 148 |
|
| 149 |
roop.globals.source_path = source_path
|
| 150 |
roop.globals.target_path = target_video_path
|
| 151 |
-
roop.globals.output_path =
|
| 152 |
roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
|
| 153 |
roop.globals.headless = True
|
| 154 |
roop.globals.keep_fps = True
|
|
@@ -286,6 +278,12 @@ class EndpointHandler():
|
|
| 286 |
inputs = data.get("inputs", {})
|
| 287 |
ref_image_url = inputs.get("ref_image_url", "")
|
| 288 |
video_url = inputs.get("video_url", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
# Create a unique temporary directory for this request
|
| 291 |
with tempfile.TemporaryDirectory() as temp_dir:
|
|
@@ -302,21 +300,37 @@ class EndpointHandler():
|
|
| 302 |
self.download_file(ref_image_url, downloaded_image_path)
|
| 303 |
ref_image = Image.open(downloaded_image_path)
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
pose_output_path = os.path.join(temp_dir, "pose_videos")
|
| 306 |
|
| 307 |
# Run the extract_dwpose_from_vid.py script
|
| 308 |
command = [
|
| 309 |
-
"python", "
|
| 310 |
-
"--
|
| 311 |
-
"--vidfn", downloaded_video_path,
|
| 312 |
-
"--output_dir", pose_output_path
|
| 313 |
]
|
| 314 |
result = subprocess.run(command, capture_output=True, text=True)
|
| 315 |
if result.returncode != 0:
|
| 316 |
raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
|
| 317 |
|
| 318 |
# Locate the extracted pose video
|
| 319 |
-
|
|
|
|
|
|
|
| 320 |
|
| 321 |
if not os.path.exists(pose_video_path):
|
| 322 |
print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
|
|
@@ -326,32 +340,36 @@ class EndpointHandler():
|
|
| 326 |
|
| 327 |
# Speed up the pose video by 4x
|
| 328 |
sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
|
| 329 |
-
self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
# Crop the face from the reference image and save it
|
| 353 |
-
cropped_face_path = os.path.join(
|
| 354 |
-
cropped_face = self._crop_face(
|
| 355 |
|
| 356 |
# Delete the pipeline and clear CUDA cache to free up memory
|
| 357 |
del self.pipeline
|
|
@@ -359,22 +377,23 @@ class EndpointHandler():
|
|
| 359 |
|
| 360 |
# Perform face swapping
|
| 361 |
# self.print_directory_contents(temp_dir)
|
| 362 |
-
# swapped_face_video_path =
|
|
|
|
| 363 |
|
| 364 |
# Slow down the produced video by 4x
|
| 365 |
self.print_directory_contents(temp_dir)
|
| 366 |
-
slowed_down_animation_path = os.path.join(
|
| 367 |
-
self.slow_down_video(
|
| 368 |
|
| 369 |
# Clear CUDA cache before RIFE interpolation
|
| 370 |
torch.cuda.empty_cache()
|
| 371 |
|
| 372 |
# Perform RIFE interpolation
|
| 373 |
-
|
| 374 |
-
|
| 375 |
|
| 376 |
# Encode the final video in base64
|
| 377 |
-
with open(
|
| 378 |
video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
|
| 379 |
|
| 380 |
torch.cuda.empty_cache()
|
|
|
|
| 4 |
import base64
|
| 5 |
from io import BytesIO
|
| 6 |
import numpy as np
|
| 7 |
+
from diffusers import AutoencoderKL, DDIMScheduler
|
| 8 |
from einops import repeat
|
| 9 |
from omegaconf import OmegaConf
|
| 10 |
+
from transformers import CLIPVisionModelWithProjection
|
| 11 |
import cv2
|
| 12 |
import os
|
| 13 |
import sys
|
| 14 |
import skvideo.io
|
| 15 |
+
from src.models.pose_guider import PoseGuider
|
| 16 |
+
from src.models.unet_2d_condition import UNet2DConditionModel
|
| 17 |
+
from src.models.unet_3d import UNet3DConditionModel
|
| 18 |
+
from src.pipelines.pipeline_pose2vid_long import Pose2VideoPipeline
|
| 19 |
+
from src.utils.util import read_frames, get_fps, save_videos_grid
|
| 20 |
import roop.globals
|
| 21 |
from roop.core import start, decode_execution_providers, suggest_max_memory, suggest_execution_threads
|
| 22 |
from roop.utilities import normalize_output_path
|
|
|
|
| 29 |
import requests
|
| 30 |
import tempfile
|
| 31 |
|
| 32 |
+
from rembg import remove
|
| 33 |
+
import onnxruntime as ort
|
| 34 |
+
|
| 35 |
|
| 36 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 37 |
|
|
|
|
| 46 |
if not os.path.exists(config_path):
|
| 47 |
raise FileNotFoundError(f"The configuration file was not found at: {config_path}")
|
| 48 |
|
|
|
|
| 49 |
self.config = OmegaConf.load(config_path)
|
| 50 |
self.weight_dtype = torch.float16
|
| 51 |
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 52 |
self.pipeline = None
|
| 53 |
+
self._initialize_pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def _initialize_pipeline(self):
|
| 56 |
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
| 134 |
|
| 135 |
return cropped_face
|
| 136 |
|
| 137 |
+
def _swap_face(self, source_path, target_video_path, output_path):
|
| 138 |
# source_path = "input.jpg"
|
| 139 |
# source_image.save(source_path, format="JPEG", quality=95)
|
|
|
|
| 140 |
|
| 141 |
roop.globals.source_path = source_path
|
| 142 |
roop.globals.target_path = target_video_path
|
| 143 |
+
roop.globals.output_path = output_path
|
| 144 |
roop.globals.frame_processors = ["face_swapper", "face_enhancer"]
|
| 145 |
roop.globals.headless = True
|
| 146 |
roop.globals.keep_fps = True
|
|
|
|
| 278 |
inputs = data.get("inputs", {})
|
| 279 |
ref_image_url = inputs.get("ref_image_url", "")
|
| 280 |
video_url = inputs.get("video_url", "")
|
| 281 |
+
width = inputs.get("width", 512)
|
| 282 |
+
height = inputs.get("height", 768)
|
| 283 |
+
length = inputs.get("length", 24)
|
| 284 |
+
num_inference_steps = inputs.get("num_inference_steps", 25)
|
| 285 |
+
cfg = inputs.get("cfg", 3.5)
|
| 286 |
+
seed = inputs.get("seed", 123)
|
| 287 |
|
| 288 |
# Create a unique temporary directory for this request
|
| 289 |
with tempfile.TemporaryDirectory() as temp_dir:
|
|
|
|
| 300 |
self.download_file(ref_image_url, downloaded_image_path)
|
| 301 |
ref_image = Image.open(downloaded_image_path)
|
| 302 |
|
| 303 |
+
# Calculate new dimensions
|
| 304 |
+
original_width, original_height = ref_image.size
|
| 305 |
+
max_dimension = max(original_width, original_height)
|
| 306 |
+
if max_dimension > 600:
|
| 307 |
+
ratio = max_dimension / 600
|
| 308 |
+
width = int(original_width / ratio)
|
| 309 |
+
height = int(original_height / ratio)
|
| 310 |
+
else:
|
| 311 |
+
width = original_width
|
| 312 |
+
height = original_height
|
| 313 |
+
|
| 314 |
+
# Remove the background from the reference image
|
| 315 |
+
ref_image_no_bg = remove(ref_image)
|
| 316 |
+
ref_image_no_bg_path = os.path.join(video_root, "ref_image_no_bg.png")
|
| 317 |
+
ref_image_no_bg.save(ref_image_no_bg_path)
|
| 318 |
+
|
| 319 |
pose_output_path = os.path.join(temp_dir, "pose_videos")
|
| 320 |
|
| 321 |
# Run the extract_dwpose_from_vid.py script
|
| 322 |
command = [
|
| 323 |
+
"python", "extract_dwpose_from_vid.py",
|
| 324 |
+
"--video_root", video_root
|
|
|
|
|
|
|
| 325 |
]
|
| 326 |
result = subprocess.run(command, capture_output=True, text=True)
|
| 327 |
if result.returncode != 0:
|
| 328 |
raise RuntimeError(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
|
| 329 |
|
| 330 |
# Locate the extracted pose video
|
| 331 |
+
save_dir = video_root + "_dwpose"
|
| 332 |
+
print(f"Expected save directory: {save_dir}") # Debug statement
|
| 333 |
+
pose_video_path = os.path.join(save_dir, "downloaded_video.mp4")
|
| 334 |
|
| 335 |
if not os.path.exists(pose_video_path):
|
| 336 |
print(f"Error running extract_dwpose_from_vid.py: {result.stderr}")
|
|
|
|
| 340 |
|
| 341 |
# Speed up the pose video by 4x
|
| 342 |
sped_up_pose_video_path = os.path.join(temp_dir, "sped_up_pose_video.mp4")
|
| 343 |
+
self.speed_up_video(pose_video_path, sped_up_pose_video_path, factor=4)
|
| 344 |
+
|
| 345 |
+
torch.manual_seed(seed)
|
| 346 |
+
pose_images = read_frames(sped_up_pose_video_path)
|
| 347 |
+
src_fps = get_fps(sped_up_pose_video_path)
|
| 348 |
+
|
| 349 |
+
pose_list = []
|
| 350 |
+
total_length = min(length, len(pose_images))
|
| 351 |
+
for pose_image_pil in pose_images[:total_length]:
|
| 352 |
+
pose_list.append(pose_image_pil)
|
| 353 |
+
|
| 354 |
+
video = self.pipeline(
|
| 355 |
+
ref_image_no_bg,
|
| 356 |
+
pose_list,
|
| 357 |
+
width=width,
|
| 358 |
+
height=height,
|
| 359 |
+
video_length=total_length,
|
| 360 |
+
num_inference_steps=num_inference_steps,
|
| 361 |
+
guidance_scale=cfg
|
| 362 |
+
).videos
|
| 363 |
+
|
| 364 |
+
save_dir = os.path.join(temp_dir, "output")
|
| 365 |
+
if not os.path.exists(save_dir):
|
| 366 |
+
os.makedirs(save_dir, exist_ok=True)
|
| 367 |
+
animation_path = os.path.join(save_dir, "animation_output.mp4")
|
| 368 |
+
save_videos_grid(video, animation_path, n_rows=1, fps=src_fps)
|
| 369 |
|
| 370 |
# Crop the face from the reference image and save it
|
| 371 |
+
cropped_face_path = os.path.join(save_dir, "cropped_face.jpg")
|
| 372 |
+
cropped_face = self._crop_face(ref_image_no_bg, save_path=cropped_face_path)
|
| 373 |
|
| 374 |
# Delete the pipeline and clear CUDA cache to free up memory
|
| 375 |
del self.pipeline
|
|
|
|
| 377 |
|
| 378 |
# Perform face swapping
|
| 379 |
# self.print_directory_contents(temp_dir)
|
| 380 |
+
# swapped_face_video_path = os.path.join(save_dir, "swapped_face_output.mp4")
|
| 381 |
+
# self._swap_face(cropped_face_path, animation_path, swapped_face_video_path)
|
| 382 |
|
| 383 |
# Slow down the produced video by 4x
|
| 384 |
self.print_directory_contents(temp_dir)
|
| 385 |
+
slowed_down_animation_path = os.path.join(save_dir, "slowed_down_animation_output.mp4")
|
| 386 |
+
self.slow_down_video(animation_path, slowed_down_animation_path, factor=4)
|
| 387 |
|
| 388 |
# Clear CUDA cache before RIFE interpolation
|
| 389 |
torch.cuda.empty_cache()
|
| 390 |
|
| 391 |
# Perform RIFE interpolation
|
| 392 |
+
rife_output_path = os.path.join(save_dir, "completed_result.mp4")
|
| 393 |
+
self.run_rife_interpolation(slowed_down_animation_path, rife_output_path, multi=2, scale=0.5)
|
| 394 |
|
| 395 |
# Encode the final video in base64
|
| 396 |
+
with open(rife_output_path, "rb") as video_file:
|
| 397 |
video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
|
| 398 |
|
| 399 |
torch.cuda.empty_cache()
|
input.jpg
DELETED
|
Binary file (20.3 kB)
|
|
|
me.jpeg
DELETED
|
Binary file (82.6 kB)
|
|
|
output/gradio/animation_output.mp4
DELETED
|
Binary file (103 kB)
|
|
|
output/gradio/cropped_face.jpg
DELETED
|
Binary file (95.4 kB)
|
|
|
pose_video.mp4
DELETED
|
Binary file (755 kB)
|
|
|
pretrained_weights/DWPose/dw-ll_ucoco_384.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
|
| 3 |
+
size 134399116
|
pretrained_weights/DWPose/yolox_l.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
|
| 3 |
+
size 216746733
|
pretrained_weights/denoising_unet.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9e5a2c34fac369e8a922972ca2210916c6af175a0dad907deccf6235816ad52
|
| 3 |
+
size 3438374293
|
pretrained_weights/image_encoder/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/home/jpinkney/.cache/huggingface/diffusers/models--lambdalabs--sd-image-variations-diffusers/snapshots/ca6f97f838ae1b5bf764f31363a21f388f4d8f3e/image_encoder",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"CLIPVisionModelWithProjection"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"dropout": 0.0,
|
| 8 |
+
"hidden_act": "quick_gelu",
|
| 9 |
+
"hidden_size": 1024,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"initializer_factor": 1.0,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 4096,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"model_type": "clip_vision_model",
|
| 16 |
+
"num_attention_heads": 16,
|
| 17 |
+
"num_channels": 3,
|
| 18 |
+
"num_hidden_layers": 24,
|
| 19 |
+
"patch_size": 14,
|
| 20 |
+
"projection_dim": 768,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.25.1"
|
| 23 |
+
}
|
pretrained_weights/image_encoder/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89d2aa29b5fdf64f3ad4f45fb4227ea98bc45156bbae673b85be1af7783dbabb
|
| 3 |
+
size 1215993967
|
pretrained_weights/motion_module.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d11e01a281b39880da2efeea892215c1313e5713fca3d100a7fbb72ee312ef9
|
| 3 |
+
size 1817900227
|
pretrained_weights/pose_guider.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a8b7c1b4db92980fd977b4fd003c1396bbae9a9cdea00c35d452136d5e4f488
|
| 3 |
+
size 4351337
|
pretrained_weights/reference_unet.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beddccb08d49a8b29b0f4d6d456c6521d4382a8d8d48884fa60ba8802509c214
|
| 3 |
+
size 3438323817
|
pretrained_weights/sd-vae-ft-mse/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.4.2",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"block_out_channels": [
|
| 6 |
+
128,
|
| 7 |
+
256,
|
| 8 |
+
512,
|
| 9 |
+
512
|
| 10 |
+
],
|
| 11 |
+
"down_block_types": [
|
| 12 |
+
"DownEncoderBlock2D",
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D"
|
| 16 |
+
],
|
| 17 |
+
"in_channels": 3,
|
| 18 |
+
"latent_channels": 4,
|
| 19 |
+
"layers_per_block": 2,
|
| 20 |
+
"norm_num_groups": 32,
|
| 21 |
+
"out_channels": 3,
|
| 22 |
+
"sample_size": 256,
|
| 23 |
+
"up_block_types": [
|
| 24 |
+
"UpDecoderBlock2D",
|
| 25 |
+
"UpDecoderBlock2D",
|
| 26 |
+
"UpDecoderBlock2D",
|
| 27 |
+
"UpDecoderBlock2D"
|
| 28 |
+
]
|
| 29 |
+
}
|
pretrained_weights/sd-vae-ft-mse/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
|
| 3 |
+
size 334707217
|
pretrained_weights/stable-diffusion-v1-5/unet/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.6.0",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"attention_head_dim": 8,
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
320,
|
| 8 |
+
640,
|
| 9 |
+
1280,
|
| 10 |
+
1280
|
| 11 |
+
],
|
| 12 |
+
"center_input_sample": false,
|
| 13 |
+
"cross_attention_dim": 768,
|
| 14 |
+
"down_block_types": [
|
| 15 |
+
"CrossAttnDownBlock2D",
|
| 16 |
+
"CrossAttnDownBlock2D",
|
| 17 |
+
"CrossAttnDownBlock2D",
|
| 18 |
+
"DownBlock2D"
|
| 19 |
+
],
|
| 20 |
+
"downsample_padding": 1,
|
| 21 |
+
"flip_sin_to_cos": true,
|
| 22 |
+
"freq_shift": 0,
|
| 23 |
+
"in_channels": 4,
|
| 24 |
+
"layers_per_block": 2,
|
| 25 |
+
"mid_block_scale_factor": 1,
|
| 26 |
+
"norm_eps": 1e-05,
|
| 27 |
+
"norm_num_groups": 32,
|
| 28 |
+
"out_channels": 4,
|
| 29 |
+
"sample_size": 64,
|
| 30 |
+
"up_block_types": [
|
| 31 |
+
"UpBlock2D",
|
| 32 |
+
"CrossAttnUpBlock2D",
|
| 33 |
+
"CrossAttnUpBlock2D",
|
| 34 |
+
"CrossAttnUpBlock2D"
|
| 35 |
+
]
|
| 36 |
+
}
|
pretrained_weights/stable-diffusion-v1-5/unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7da0e21ba7ea50637bee26e81c220844defdf01aafca02b2c42ecdadb813de4
|
| 3 |
+
size 3438354725
|
requirements.txt
CHANGED
|
@@ -57,3 +57,6 @@ sk-video==1.1.10
|
|
| 57 |
moviepy==1.0.3
|
| 58 |
|
| 59 |
requests==2.32.3
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
moviepy==1.0.3
|
| 58 |
|
| 59 |
requests==2.32.3
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
rembg
|
sampler.py
CHANGED
|
@@ -10,10 +10,8 @@ handler = EndpointHandler()
|
|
| 10 |
# Define sample inputs
|
| 11 |
inputs = {
|
| 12 |
"inputs": {
|
| 13 |
-
"ref_image_url": "https://media.discordapp.net/attachments/
|
| 14 |
"video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
|
| 15 |
-
"width": 378,
|
| 16 |
-
"height": 504,
|
| 17 |
"length": 24,
|
| 18 |
"num_inference_steps": 25,
|
| 19 |
"cfg": 3.5,
|
|
|
|
| 10 |
# Define sample inputs
|
| 11 |
inputs = {
|
| 12 |
"inputs": {
|
| 13 |
+
"ref_image_url": "https://media.discordapp.net/attachments/1237667074210267217/1246013998042976276/image.jpg?ex=665ad876&is=665986f6&hm=e7f0e6fd51c1068c15f1a750ca97abb4b2a4bfed396160ff44cf1abecb489d11&=&format=webp&width=896&height=1194",
|
| 14 |
"video_url": "https://cdn.discordapp.com/attachments/1237667074210267217/1245971599660679208/pose.mov?ex=665ab0fa&is=66595f7a&hm=63691e23a23ebd8657a10ec708d63a06046a124c3940aa133de22a94aa1fd6c5&",
|
|
|
|
|
|
|
| 15 |
"length": 24,
|
| 16 |
"num_inference_steps": 25,
|
| 17 |
"cfg": 3.5,
|