acmyu commited on
Commit
53980f6
·
1 Parent(s): 864de0c

output pose coords

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. main.py +18 -10
app.py CHANGED
@@ -26,6 +26,7 @@ with gr.Blocks() as demo:
26
  animation = gr.Video(label="Result")
27
  frames = gr.Gallery(type="pil", label="Frames", format="png")
28
  frames_thumb = gr.Gallery(type="pil", label="Thumbnails", format="png")
 
29
 
30
  submit_btn.click(
31
  run_app, inputs=[char_imgs, mocap, tr_steps, inf_steps, fps, remove_bg, resize_inputs], outputs=[animation, frames]
@@ -36,7 +37,7 @@ with gr.Blocks() as demo:
36
  )
37
 
38
  inference_btn.click(
39
- run_inference, inputs=[char_imgs, mocap, tr_steps, inf_steps, fps, modelId, img_width, img_height, remove_bg, resize_inputs], outputs=[animation, frames, frames_thumb]
40
  )
41
 
42
 
 
26
  animation = gr.Video(label="Result")
27
  frames = gr.Gallery(type="pil", label="Frames", format="png")
28
  frames_thumb = gr.Gallery(type="pil", label="Thumbnails", format="png")
29
+ pose_coords = gr.JSON(label="Pose Coordinates")
30
 
31
  submit_btn.click(
32
  run_app, inputs=[char_imgs, mocap, tr_steps, inf_steps, fps, remove_bg, resize_inputs], outputs=[animation, frames]
 
37
  )
38
 
39
  inference_btn.click(
40
+ run_inference, inputs=[char_imgs, mocap, tr_steps, inf_steps, fps, modelId, img_width, img_height, remove_bg, resize_inputs], outputs=[animation, frames, frames_thumb, pose_coords]
41
  )
42
 
43
 
main.py CHANGED
@@ -58,7 +58,7 @@ import uuid
58
  import gc
59
  from numba import cuda
60
  import requests
61
- import uuid
62
 
63
  from huggingface_hub import hf_hub_download, HfApi
64
 
@@ -221,7 +221,7 @@ def get_pose(img, dwpose, outfile, crop=False):
221
  out_img = out_img.crop(bbox)
222
  out_img = ImageOps.expand(out_img, border=int(out_img.width*0.2), fill=(0,0,0))
223
 
224
- return out_img
225
 
226
 
227
  def extract_frames(video_path, fps):
@@ -272,12 +272,13 @@ def prepare_inputs_train(images, bg_remove, dwpose, rembg_session):
272
  images = [removebg(img, rembg_session) for img in images]
273
 
274
  in_img = images[0]
275
- in_pose = get_pose(in_img, dwpose, "in_pose.png")
276
  train_poses = []
277
  train_imgs = [resize_and_pad(img, in_img) for img in images[1:]]
278
 
279
  for i, img in enumerate(train_imgs):
280
- train_poses.append(get_pose(img, dwpose, "tr_pose"+str(i)+".png"))
 
281
 
282
  return in_img, in_pose, train_imgs, train_poses
283
 
@@ -287,7 +288,7 @@ def prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remo
287
 
288
  print("prepare_inputs_inference")
289
 
290
- in_pose = get_pose(in_img, dwpose, "in_pose.png")
291
 
292
  frames = extract_frames(in_vid, fps)
293
  print("remove background", bg_remove)
@@ -302,14 +303,21 @@ def prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remo
302
 
303
  progress_bar = tqdm(range(len(frames)), initial=0, desc="Frames")
304
  target_poses = []
 
305
  max_left = max_top = 999999
306
  max_right = max_bottom = 0
307
  it = frames
308
  if is_app:
309
  it = progress.tqdm(frames, desc="Pose Detection")
310
  for f in it:
311
- tpose = get_pose(f, dwpose, "tar_pose"+str(len(target_poses))+".png")
 
 
 
 
 
312
  target_poses.append(tpose)
 
313
  progress_bar.update(1)
314
 
315
  bbox = tpose.getbbox()
@@ -332,14 +340,14 @@ def prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remo
332
  tpose.save("out/"+"tar_pose"+str(len(target_poses_cropped))+".png")
333
  target_poses_cropped.append(tpose)
334
 
335
- return in_img, target_poses_cropped, in_pose
336
 
337
 
338
  def prepare_inputs(images, in_vid, fps, bg_remove, dwpose, rembg_session, resize='target', is_app=False):
339
 
340
  in_img, in_pose, train_imgs, train_poses = prepare_inputs_train(images, bg_remove, dwpose, rembg_session)
341
 
342
- in_img, target_poses_cropped, _ = prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remove, resize, is_app)
343
 
344
 
345
  return in_img, in_pose, train_imgs, train_poses, target_poses_cropped
@@ -1125,7 +1133,7 @@ def run_inference(images, video_path, train_steps=100, inference_steps=10, fps=1
1125
  images = [img[0] for img in images]
1126
  in_img = images[0]
1127
 
1128
- in_img, target_poses, in_pose = prepare_inputs_inference(in_img, video_path, fps, dwpose, rembg_session, bg_remove, 'target', is_app)
1129
 
1130
  results = inference(modelId, in_img, in_pose, target_poses, inference_steps, None, vae, unet, image_encoder_p, is_app)
1131
  #urls = save_temp_imgs(results)
@@ -1143,7 +1151,7 @@ def run_inference(images, video_path, train_steps=100, inference_steps=10, fps=1
1143
 
1144
  print("Done!")
1145
 
1146
- return out_vid+'.webm', results, getThumbnails(results)
1147
 
1148
 
1149
  def run_app(images, video_path, train_steps=100, inference_steps=10, fps=12, bg_remove=False, resize_inputs=True):
 
58
  import gc
59
  from numba import cuda
60
  import requests
61
+ import json
62
 
63
  from huggingface_hub import hf_hub_download, HfApi
64
 
 
221
  out_img = out_img.crop(bbox)
222
  out_img = ImageOps.expand(out_img, border=int(out_img.width*0.2), fill=(0,0,0))
223
 
224
+ return out_img, pose
225
 
226
 
227
  def extract_frames(video_path, fps):
 
272
  images = [removebg(img, rembg_session) for img in images]
273
 
274
  in_img = images[0]
275
+ in_pose, _ = get_pose(in_img, dwpose, "in_pose.png")
276
  train_poses = []
277
  train_imgs = [resize_and_pad(img, in_img) for img in images[1:]]
278
 
279
  for i, img in enumerate(train_imgs):
280
+ train_pose, _ = get_pose(img, dwpose, "tr_pose"+str(i)+".png")
281
+ train_poses.append(train_pose)
282
 
283
  return in_img, in_pose, train_imgs, train_poses
284
 
 
288
 
289
  print("prepare_inputs_inference")
290
 
291
+ in_pose, _ = get_pose(in_img, dwpose, "in_pose.png")
292
 
293
  frames = extract_frames(in_vid, fps)
294
  print("remove background", bg_remove)
 
303
 
304
  progress_bar = tqdm(range(len(frames)), initial=0, desc="Frames")
305
  target_poses = []
306
+ target_poses_coords = []
307
  max_left = max_top = 999999
308
  max_right = max_bottom = 0
309
  it = frames
310
  if is_app:
311
  it = progress.tqdm(frames, desc="Pose Detection")
312
  for f in it:
313
+ tpose, tpose_coords = get_pose(f, dwpose, "tar_pose"+str(len(target_poses))+".png")
314
+ #print(tpose_coords)
315
+ coords = {}
316
+ for k in tpose_coords:
317
+ coords[k] = tpose_coords[k].tolist()
318
+ #print(coords)
319
  target_poses.append(tpose)
320
+ target_poses_coords.append(json.dumps(coords))
321
  progress_bar.update(1)
322
 
323
  bbox = tpose.getbbox()
 
340
  tpose.save("out/"+"tar_pose"+str(len(target_poses_cropped))+".png")
341
  target_poses_cropped.append(tpose)
342
 
343
+ return in_img, target_poses_cropped, in_pose, target_poses_coords
344
 
345
 
346
  def prepare_inputs(images, in_vid, fps, bg_remove, dwpose, rembg_session, resize='target', is_app=False):
347
 
348
  in_img, in_pose, train_imgs, train_poses = prepare_inputs_train(images, bg_remove, dwpose, rembg_session)
349
 
350
+ in_img, target_poses_cropped, _, _ = prepare_inputs_inference(in_img, in_vid, fps, dwpose, rembg_session, bg_remove, resize, is_app)
351
 
352
 
353
  return in_img, in_pose, train_imgs, train_poses, target_poses_cropped
 
1133
  images = [img[0] for img in images]
1134
  in_img = images[0]
1135
 
1136
+ in_img, target_poses, in_pose, target_poses_coords = prepare_inputs_inference(in_img, video_path, fps, dwpose, rembg_session, bg_remove, 'target', is_app)
1137
 
1138
  results = inference(modelId, in_img, in_pose, target_poses, inference_steps, None, vae, unet, image_encoder_p, is_app)
1139
  #urls = save_temp_imgs(results)
 
1151
 
1152
  print("Done!")
1153
 
1154
+ return out_vid+'.webm', results, getThumbnails(results), target_poses_coords
1155
 
1156
 
1157
  def run_app(images, video_path, train_steps=100, inference_steps=10, fps=12, bg_remove=False, resize_inputs=True):