Spaces:
Build error
Build error
| import argparse | |
| import datetime | |
| import random | |
| import time | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| import torch | |
| import torchvision.transforms as standard_transforms | |
| import numpy as np | |
| from PIL import Image | |
| import cv2 | |
| from crowd_datasets import build_dataset | |
| from engine import * | |
| from models import build_model | |
| import os | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| def get_args_parser(): | |
| parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False) | |
| # * Backbone | |
| parser.add_argument('--backbone', default='vgg16_bn', type=str, | |
| help="name of the convolutional backbone to use") | |
| parser.add_argument('--input_video', default='../Video-tests/test1.mp4', type=str, | |
| help="address of input video file") | |
| parser.add_argument('--row', default=2, type=int, | |
| help="row number of anchor points") | |
| parser.add_argument('--line', default=2, type=int, | |
| help="line number of anchor points") | |
| parser.add_argument('--output_dir', default='./logs/', | |
| help='path where to save') | |
| parser.add_argument('--weight_path', default='./weights/SHTechA.pth', | |
| help='path where the trained weights saved') | |
| parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation') | |
| return parser | |
| def load_model(args): | |
| os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id) | |
| print(args) | |
| device = torch.device('cpu') | |
| # get the P2PNet | |
| model = build_model(args) | |
| # move to GPU | |
| model.to(device) | |
| # load trained model | |
| if args.weight_path is not None: | |
| checkpoint = torch.load(args.weight_path, map_location='cpu') | |
| model.load_state_dict(checkpoint['model']) | |
| # convert to eval mode | |
| model.eval() | |
| # create the pre-processing transform | |
| transform = standard_transforms.Compose([ | |
| standard_transforms.ToTensor(), | |
| standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| ]) | |
| return model, transform, device | |
| def video_reader(videoFile): | |
| cap = cv2.VideoCapture(videoFile) | |
| while(cap.isOpened()): | |
| ret,cv2_im = cap.read() | |
| if ret: | |
| converted = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB) | |
| pil_im = Image.fromarray(converted) | |
| yield pil_im | |
| elif not ret: | |
| break | |
| cap.release() | |
| def main(args, debug=False): | |
| result = [] | |
| model, transform, device = load_model(args) | |
| for frame in tqdm(video_reader(args.input_video)): | |
| img_raw = frame | |
| # round the size | |
| width, height = img_raw.size | |
| new_width = width // 128 * 128 | |
| new_height = height // 128 * 128 | |
| img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) | |
| frames_size = (new_width, new_height) | |
| # pre-proccessing | |
| img = transform(img_raw) | |
| samples = torch.Tensor(img).unsqueeze(0) | |
| samples = samples.to(device) | |
| # run inference | |
| outputs = model(samples) | |
| outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] | |
| outputs_points = outputs['pred_points'][0] | |
| threshold = 0.5 | |
| # filter the predictions | |
| points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() | |
| predict_cnt = int((outputs_scores > threshold).sum()) | |
| outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] | |
| outputs_points = outputs['pred_points'][0] | |
| # draw the predictions | |
| size = 10 | |
| img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) | |
| for p in points: | |
| img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1) | |
| # save the visualized image | |
| # cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw) | |
| # break | |
| if result: | |
| result.write(img_to_draw) | |
| else: | |
| result = cv2.VideoWriter(f'{args.output_dir}pred_{args.input_video}.avi', | |
| cv2.VideoWriter_fourcc(*'MJPG'), | |
| 10, frames_size) | |
| result.write(img_to_draw) | |
| result.release() | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()]) | |
| args = parser.parse_args() | |
| main(args) |