Spaces:

rlogh
/

coach

Sleeping

App Files Files Community

coach / vis.py

rlogh

Upload 117 files

e931856 verified 19 days ago

raw

history blame contribute delete

13.3 kB

	import sys
	import argparse
	import cv2
	from lib.preprocess import h36m_coco_format, revise_kpts
	from lib.hrnet.gen_kpts import gen_video_kpts as hrnet_pose
	import os
	import numpy as np
	import torch
	import torch.nn as nn
	import glob
	from tqdm import tqdm
	import copy

	sys.path.append(os.getcwd())
	from common.model_poseformer import PoseTransformerV2 as Model
	from common.camera import *

	import matplotlib
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	import matplotlib.gridspec as gridspec

	plt.switch_backend('agg')
	matplotlib.rcParams['pdf.fonttype'] = 42
	matplotlib.rcParams['ps.fonttype'] = 42

	def show2Dpose(kps, img):
	connections = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5],
	[5, 6], [0, 7], [7, 8], [8, 9], [9, 10],
	[8, 11], [11, 12], [12, 13], [8, 14], [14, 15], [15, 16]]

	LR = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=bool)

	lcolor = (255, 0, 0)
	rcolor = (0, 0, 255)
	thickness = 3

	for j,c in enumerate(connections):
	start = map(int, kps[c[0]])
	end = map(int, kps[c[1]])
	start = list(start)
	end = list(end)
	cv2.line(img, (start[0], start[1]), (end[0], end[1]), lcolor if LR[j] else rcolor, thickness)
	cv2.circle(img, (start[0], start[1]), thickness=-1, color=(0, 255, 0), radius=3)
	cv2.circle(img, (end[0], end[1]), thickness=-1, color=(0, 255, 0), radius=3)

	return img


	def show3Dpose(vals, ax):
	ax.view_init(elev=15., azim=70)

	lcolor=(0,0,1)
	rcolor=(1,0,0)

	I = np.array( [0, 0, 1, 4, 2, 5, 0, 7, 8, 8, 14, 15, 11, 12, 8, 9])
	J = np.array( [1, 4, 2, 5, 3, 6, 7, 8, 14, 11, 15, 16, 12, 13, 9, 10])

	LR = np.array([0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0], dtype=bool)

	for i in np.arange( len(I) ):
	x, y, z = [np.array( [vals[I[i], j], vals[J[i], j]] ) for j in range(3)]
	ax.plot(x, y, z, lw=2, color = lcolor if LR[i] else rcolor)

	RADIUS = 0.72
	RADIUS_Z = 0.7

	xroot, yroot, zroot = vals[0,0], vals[0,1], vals[0,2]
	ax.set_xlim3d([-RADIUS+xroot, RADIUS+xroot])
	ax.set_ylim3d([-RADIUS+yroot, RADIUS+yroot])
	ax.set_zlim3d([-RADIUS_Z+zroot, RADIUS_Z+zroot])
	ax.set_aspect('auto') # works fine in matplotlib==2.2.2

	white = (1.0, 1.0, 1.0, 0.0)
	ax.xaxis.set_pane_color(white)
	ax.yaxis.set_pane_color(white)
	ax.zaxis.set_pane_color(white)

	ax.tick_params('x', labelbottom = False)
	ax.tick_params('y', labelleft = False)
	ax.tick_params('z', labelleft = False)


	def get_pose2D(video_path, output_dir):
	cap = cv2.VideoCapture(video_path)
	width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
	height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

	print('\nGenerating 2D pose...')
	keypoints, scores = hrnet_pose(video_path, det_dim=416, num_peroson=1, gen_output=True)
	keypoints, scores, valid_frames = h36m_coco_format(keypoints, scores)
	re_kpts = revise_kpts(keypoints, scores, valid_frames)
	print('Generating 2D pose successful!')

	output_dir += 'input_2D/'
	os.makedirs(output_dir, exist_ok=True)

	output_npz = output_dir + 'keypoints.npz'
	np.savez_compressed(output_npz, reconstruction=keypoints)


	def img2video(video_path, output_dir):
	cap = cv2.VideoCapture(video_path)
	fps = int(cap.get(cv2.CAP_PROP_FPS)) + 5

	fourcc = cv2.VideoWriter_fourcc(*"mp4v")

	names = sorted(glob.glob(os.path.join(output_dir + 'pose/', '*.png')))
	img = cv2.imread(names[0])
	size = (img.shape[1], img.shape[0])

	videoWrite = cv2.VideoWriter(output_dir + video_name + '.mp4', fourcc, fps, size)

	for name in names:
	img = cv2.imread(name)
	videoWrite.write(img)

	videoWrite.release()


	def showimage(ax, img):
	ax.set_xticks([])
	ax.set_yticks([])
	plt.axis('off')
	ax.imshow(img)


	def get_pose3D(video_path, output_dir):
	args, _ = argparse.ArgumentParser().parse_known_args()
	args.embed_dim_ratio, args.depth, args.frames = 32, 4, 243
	args.number_of_kept_frames, args.number_of_kept_coeffs = 27, 27
	args.pad = (args.frames - 1) // 2
	args.previous_dir = 'checkpoint/'
	args.n_joints, args.out_joints = 17, 17

	## Reload
	cuda_available = torch.cuda.is_available()
	print(f"CUDA available in get_pose3D: {cuda_available}")
	if cuda_available:
	print(f"CUDA device count: {torch.cuda.device_count()}")
	print(f"CUDA device name: {torch.cuda.get_device_name(0)}")

	device = torch.device('cuda' if cuda_available else 'cpu')
	print(f"Using device: {device}")

	base_model = Model(args=args)

	# Always use DataParallel when CUDA is available (checkpoint expects it)
	if cuda_available:
	model = nn.DataParallel(base_model).to(device)
	else:
	model = base_model.to(device)

	model_dict = model.state_dict()
	# Put the pretrained model of PoseFormerV2 in 'checkpoint/']
	# model_path = sorted(glob.glob(os.path.join(args.previous_dir, '27_243_45.2.bin')))
	# Support both local structure and HF Spaces structure
	if os.path.exists("./demo/lib/checkpoint/27_243_45.2.bin"):
	model_path = "./demo/lib/checkpoint/27_243_45.2.bin"
	elif os.path.exists("./lib/checkpoint/27_243_45.2.bin"):
	model_path = "./lib/checkpoint/27_243_45.2.bin"
	else:
	model_path = "./checkpoint/27_243_45.2.bin"

	map_location = device
	pre_dict = torch.load(model_path, map_location=map_location, weights_only=False)

	# Handle DataParallel checkpoint mismatch
	state_dict = pre_dict['model_pos']
	from collections import OrderedDict
	new_state_dict = OrderedDict()

	# Check if we need to add or remove "module." prefix
	checkpoint_has_module = any(k.startswith('module.') for k in state_dict.keys())
	model_has_module = isinstance(model, nn.DataParallel)

	if checkpoint_has_module and not model_has_module:
	# Remove "module." prefix
	for k, v in state_dict.items():
	name = k[7:] if k.startswith('module.') else k
	new_state_dict[name] = v
	elif not checkpoint_has_module and model_has_module:
	# Add "module." prefix
	for k, v in state_dict.items():
	name = 'module.' + k if not k.startswith('module.') else k
	new_state_dict[name] = v
	else:
	# No change needed
	new_state_dict = state_dict

	model.load_state_dict(new_state_dict, strict=True)

	model.eval()

	## input
	keypoints = np.load(output_dir + 'input_2D/keypoints.npz', allow_pickle=True)['reconstruction']

	cap = cv2.VideoCapture(video_path)
	video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	## 3D
	print('\nGenerating 3D pose...')
	keypoints_3D = []
	for i in tqdm(range(video_length)):
	ret, img = cap.read()
	if img is None:
	continue
	img_size = img.shape

	## input frames
	start = max(0, i - args.pad)
	end = min(i + args.pad, len(keypoints[0])-1)

	input_2D_no = keypoints[0][start:end+1]

	left_pad, right_pad = 0, 0
	if input_2D_no.shape[0] != args.frames:
	if i < args.pad:
	left_pad = args.pad - i
	if i > len(keypoints[0]) - args.pad - 1:
	right_pad = i + args.pad - (len(keypoints[0]) - 1)

	input_2D_no = np.pad(input_2D_no, ((left_pad, right_pad), (0, 0), (0, 0)), 'edge')

	joints_left = [4, 5, 6, 11, 12, 13]
	joints_right = [1, 2, 3, 14, 15, 16]

	# input_2D_no += np.random.normal(loc=0.0, scale=5, size=input_2D_no.shape)
	input_2D = normalize_screen_coordinates(input_2D_no, w=img_size[1], h=img_size[0])

	input_2D_aug = copy.deepcopy(input_2D)
	input_2D_aug[ :, :, 0] *= -1
	input_2D_aug[ :, joints_left + joints_right] = input_2D_aug[ :, joints_right + joints_left]
	input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0)
	# (2, 243, 17, 2)

	input_2D = input_2D[np.newaxis, :, :, :, :]

	input_2D = torch.from_numpy(input_2D.astype('float32')).to(device)

	N = input_2D.size(0)

	## estimation
	output_3D_non_flip = model(input_2D[:, 0])
	output_3D_flip = model(input_2D[:, 1])
	# [1, 1, 17, 3]

	output_3D_flip[:, :, :, 0] *= -1
	output_3D_flip[:, :, joints_left + joints_right, :] = output_3D_flip[:, :, joints_right + joints_left, :]

	output_3D = (output_3D_non_flip + output_3D_flip) / 2

	output_3D[:, :, 0, :] = 0
	post_out = output_3D[0, 0].cpu().detach().numpy()
	keypoints_3D.append(post_out)
	# print(f'Output 3D shape: {output_3D.shape}, post_out shape: {post_out.shape}, output 3D sample: {output_3D[0]}, post out sample: {post_out}')

	rot = [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088]
	rot = np.array(rot, dtype='float32')
	post_out = camera_to_world(post_out, R=rot, t=0)
	post_out[:, 2] -= np.min(post_out[:, 2])

	input_2D_no = input_2D_no[args.pad]

	## 2D
	image = show2Dpose(input_2D_no, copy.deepcopy(img))

	output_dir_2D = output_dir +'pose2D/'
	os.makedirs(output_dir_2D, exist_ok=True)
	cv2.imwrite(output_dir_2D + str(('%04d'% i)) + '_2D.png', image)

	## 3D
	fig = plt.figure(figsize=(9.6, 5.4))
	gs = gridspec.GridSpec(1, 1)
	gs.update(wspace=-0.00, hspace=0.05)
	ax = plt.subplot(gs[0], projection='3d')
	show3Dpose( post_out, ax)

	output_dir_3D = output_dir +'pose3D/'
	os.makedirs(output_dir_3D, exist_ok=True)
	plt.savefig(output_dir_3D + str(('%04d'% i)) + '_3D.png', dpi=200, format='png', bbox_inches = 'tight')
	plt.clf()
	plt.close(fig)

	output_npz = output_dir + 'keypoints_3D.npz'
	np.savez_compressed(output_npz, reconstruction=keypoints_3D)
	print('Generating 3D pose successful!')

	## all
	image_dir = 'results/'
	image_2d_dir = sorted(glob.glob(os.path.join(output_dir_2D, '*.png')))
	image_3d_dir = sorted(glob.glob(os.path.join(output_dir_3D, '*.png')))

	print('\nGenerating demo...')
	for i in tqdm(range(len(image_2d_dir))):
	image_2d = plt.imread(image_2d_dir[i])
	image_3d = plt.imread(image_3d_dir[i])

	## crop
	edge = (image_2d.shape[1] - image_2d.shape[0]) // 2
	image_2d = image_2d[:, edge:image_2d.shape[1] - edge]

	edge = 130
	image_3d = image_3d[edge:image_3d.shape[0] - edge, edge:image_3d.shape[1] - edge]

	## show
	font_size = 12
	fig = plt.figure(figsize=(15.0, 5.4))
	ax = plt.subplot(121)
	showimage(ax, image_2d)
	ax.set_title("Input", fontsize = font_size)

	ax = plt.subplot(122)
	showimage(ax, image_3d)
	ax.set_title("Reconstruction", fontsize = font_size)

	## save
	output_dir_pose = output_dir +'pose/'
	os.makedirs(output_dir_pose, exist_ok=True)
	plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
	plt.margins(0, 0)
	plt.savefig(output_dir_pose + str(('%04d'% i)) + '_pose.png', dpi=200, bbox_inches = 'tight')
	plt.clf()
	plt.close(fig)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument('--video', type=str, default='sample_video.mp4', help='input video')
	parser.add_argument('--gpu', type=str, default='0', help='GPU device ID (set CUDA_VISIBLE_DEVICES before running if needed)')
	args = parser.parse_args()

	# Note: CUDA_VISIBLE_DEVICES must be set BEFORE importing torch
	# Since torch is imported at the top, setting it here won't work
	# Set it in your environment before running: $env:CUDA_VISIBLE_DEVICES="0" (PowerShell) or export CUDA_VISIBLE_DEVICES=0 (bash)

	# Verify CUDA availability
	print(f"CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	print(f"CUDA device count: {torch.cuda.device_count()}")
	print(f"Current device: {torch.cuda.current_device()}")
	print(f"Device name: {torch.cuda.get_device_name(0)}")
	if "CUDA_VISIBLE_DEVICES" in os.environ:
	print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}")
	else:
	print("WARNING: CUDA is not available!")
	print("This might be because:")
	print(" 1. CUDA_VISIBLE_DEVICES was set incorrectly")
	print(" 2. PyTorch was installed without CUDA support")
	print(" 3. GPU drivers are not installed")
	print("\nTo use GPU, set CUDA_VISIBLE_DEVICES BEFORE running Python:")
	print(" PowerShell: $env:CUDA_VISIBLE_DEVICES='0'")
	print(" Bash: export CUDA_VISIBLE_DEVICES=0")
	print("\nOr don't set it at all to use the default GPU")

	video_path = './demo/video/' + args.video
	video_name = video_path.split('/')[-1].split('.')[0]
	output_dir = './demo/output/' + video_name + '/'

	get_pose2D(video_path, output_dir)
	get_pose3D(video_path, output_dir)
	img2video(video_path, output_dir)
	print('Generating demo successful!')