Spaces:
Paused
Paused
generate animation with frame images
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ with gr.Blocks() as demo:
|
|
| 11 |
with gr.Column():
|
| 12 |
char_imgs = gr.Gallery(type="pil", label="Images of the Character")
|
| 13 |
mocap = gr.Video(label="Motion-Capture Video")
|
|
|
|
| 14 |
poses = gr.JSON(label="Pose Coordinates")
|
| 15 |
tr_steps = gr.Number(label="Training steps", value=10)
|
| 16 |
inf_steps = gr.Number(label="Inference steps", value=10)
|
|
@@ -40,7 +41,7 @@ with gr.Blocks() as demo:
|
|
| 40 |
)
|
| 41 |
|
| 42 |
inference_btn.click(
|
| 43 |
-
run_inference, inputs=[char_imgs, mocap, tr_steps, inf_steps, fps, modelId, img_width, img_height, remove_bg, resize_inputs], outputs=[animation, frames, frames_thumb, pose_coords, reference]
|
| 44 |
)
|
| 45 |
|
| 46 |
generate_frame_btn.click(
|
|
|
|
| 11 |
with gr.Column():
|
| 12 |
char_imgs = gr.Gallery(type="pil", label="Images of the Character")
|
| 13 |
mocap = gr.Video(label="Motion-Capture Video")
|
| 14 |
+
frame_imgs = gr.Gallery(type="pil", label="Reference Images of Each Frame")
|
| 15 |
poses = gr.JSON(label="Pose Coordinates")
|
| 16 |
tr_steps = gr.Number(label="Training steps", value=10)
|
| 17 |
inf_steps = gr.Number(label="Inference steps", value=10)
|
|
|
|
| 41 |
)
|
| 42 |
|
| 43 |
inference_btn.click(
|
| 44 |
+
run_inference, inputs=[char_imgs, mocap, frame_imgs, tr_steps, inf_steps, fps, modelId, img_width, img_height, remove_bg, resize_inputs], outputs=[animation, frames, frames_thumb, pose_coords, reference]
|
| 45 |
)
|
| 46 |
|
| 47 |
generate_frame_btn.click(
|
main.py
CHANGED
|
@@ -285,14 +285,17 @@ def prepare_inputs_train(images, bg_remove, dwpose, rembg_session):
|
|
| 285 |
return in_img, in_pose, train_imgs, train_poses
|
| 286 |
|
| 287 |
|
| 288 |
-
def prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app=False):
|
| 289 |
progress=gr.Progress(track_tqdm=True)
|
| 290 |
|
| 291 |
print("prepare_inputs_inference")
|
| 292 |
|
| 293 |
in_pose, _ = get_pose(in_img, dwpose, "in_pose.png")
|
| 294 |
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
| 296 |
print("remove background", bg_remove)
|
| 297 |
if bg_remove:
|
| 298 |
in_img = removebg(in_img, rembg_session)
|
|
@@ -354,7 +357,7 @@ def prepare_inputs(images, in_vid, fps, bg_remove, dwpose, rembg_session, resize
|
|
| 354 |
|
| 355 |
in_img, in_pose, train_imgs, train_poses = prepare_inputs_train(images, bg_remove, dwpose, rembg_session)
|
| 356 |
|
| 357 |
-
in_img, target_poses_cropped, _, _, _ = prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app)
|
| 358 |
|
| 359 |
|
| 360 |
return in_img, in_pose, train_imgs, train_poses, target_poses_cropped
|
|
@@ -1123,7 +1126,7 @@ def run_train(images, train_steps=100, modelId="fine_tuned_pcdms", bg_remove=Tru
|
|
| 1123 |
train(modelId, in_img, in_pose, train_imgs, train_poses, train_steps, pcdms_model, noise_scheduler, image_encoder_p, image_encoder_g, vae, unet, finetune, is_app)
|
| 1124 |
|
| 1125 |
|
| 1126 |
-
def run_inference(images, video_path, train_steps=100, inference_steps=10, fps=12, modelId="fine_tuned_pcdms", img_width=1920, img_height=1080, bg_remove=True, resize_inputs=True):
|
| 1127 |
finetune=True
|
| 1128 |
is_app=True
|
| 1129 |
|
|
@@ -1135,8 +1138,10 @@ def run_inference(images, video_path, train_steps=100, inference_steps=10, fps=1
|
|
| 1135 |
|
| 1136 |
images = [img[0] for img in images]
|
| 1137 |
in_img = images[0]
|
|
|
|
|
|
|
| 1138 |
|
| 1139 |
-
in_img, target_poses, in_pose, target_poses_coords, orig_frames = prepare_inputs_inference(in_img, video_path, fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app)
|
| 1140 |
|
| 1141 |
results = inference(modelId, in_img, in_pose, target_poses, inference_steps, None, vae, unet, image_encoder_p, is_app)
|
| 1142 |
#urls = save_temp_imgs(results)
|
|
|
|
| 285 |
return in_img, in_pose, train_imgs, train_poses
|
| 286 |
|
| 287 |
|
| 288 |
+
def prepare_inputs_inference(in_img, in_vid, frames, fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app=False):
|
| 289 |
progress=gr.Progress(track_tqdm=True)
|
| 290 |
|
| 291 |
print("prepare_inputs_inference")
|
| 292 |
|
| 293 |
in_pose, _ = get_pose(in_img, dwpose, "in_pose.png")
|
| 294 |
|
| 295 |
+
print(in_vid)
|
| 296 |
+
print(frames)
|
| 297 |
+
if in_vid:
|
| 298 |
+
frames = extract_frames(in_vid, fps)
|
| 299 |
print("remove background", bg_remove)
|
| 300 |
if bg_remove:
|
| 301 |
in_img = removebg(in_img, rembg_session)
|
|
|
|
| 357 |
|
| 358 |
in_img, in_pose, train_imgs, train_poses = prepare_inputs_train(images, bg_remove, dwpose, rembg_session)
|
| 359 |
|
| 360 |
+
in_img, target_poses_cropped, _, _, _ = prepare_inputs_inference(in_img, in_vid, [], fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app)
|
| 361 |
|
| 362 |
|
| 363 |
return in_img, in_pose, train_imgs, train_poses, target_poses_cropped
|
|
|
|
| 1126 |
train(modelId, in_img, in_pose, train_imgs, train_poses, train_steps, pcdms_model, noise_scheduler, image_encoder_p, image_encoder_g, vae, unet, finetune, is_app)
|
| 1127 |
|
| 1128 |
|
| 1129 |
+
def run_inference(images, video_path, frames, train_steps=100, inference_steps=10, fps=12, modelId="fine_tuned_pcdms", img_width=1920, img_height=1080, bg_remove=True, resize_inputs=True):
|
| 1130 |
finetune=True
|
| 1131 |
is_app=True
|
| 1132 |
|
|
|
|
| 1138 |
|
| 1139 |
images = [img[0] for img in images]
|
| 1140 |
in_img = images[0]
|
| 1141 |
+
if frames:
|
| 1142 |
+
frames = [img[0] for img in frames]
|
| 1143 |
|
| 1144 |
+
in_img, target_poses, in_pose, target_poses_coords, orig_frames = prepare_inputs_inference(in_img, video_path, frames, fps, dwpose, rembg_session, bg_remove, resize_inputs, is_app)
|
| 1145 |
|
| 1146 |
results = inference(modelId, in_img, in_pose, target_poses, inference_steps, None, vae, unet, image_encoder_p, is_app)
|
| 1147 |
#urls = save_temp_imgs(results)
|