|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
import numpy as np
|
|
|
import onnxruntime
|
|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
from mmengine import Config
|
|
|
from mmengine.registry import init_default_scope
|
|
|
from mmengine.runner import load_checkpoint
|
|
|
from mmengine.structures import LabelData
|
|
|
|
|
|
from mmaction.registry import MODELS
|
|
|
from mmaction.structures import ActionDataSample
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
parser = argparse.ArgumentParser(description='Get model flops and params')
|
|
|
parser.add_argument('config', help='config file path')
|
|
|
parser.add_argument('checkpoint', help='checkpoint file')
|
|
|
parser.add_argument(
|
|
|
'--num_frames', type=int, default=48, help='number of input frames.')
|
|
|
parser.add_argument(
|
|
|
'--image_size', type=int, default=64, help='size of the frame')
|
|
|
parser.add_argument(
|
|
|
'--num_joints',
|
|
|
type=int,
|
|
|
default=0,
|
|
|
help='number of joints. If not given, will use default settings from'
|
|
|
'the config file')
|
|
|
parser.add_argument(
|
|
|
'--device', type=str, default='cpu', help='CPU/CUDA device option')
|
|
|
parser.add_argument(
|
|
|
'--output_file',
|
|
|
type=str,
|
|
|
default='posec3d.onnx',
|
|
|
help='file name of the output onnx file')
|
|
|
args = parser.parse_args()
|
|
|
return args
|
|
|
|
|
|
|
|
|
class AvgPool3d(nn.Module):
|
|
|
|
|
|
def forward(self, x):
|
|
|
return x.mean(dim=(-1, -2, -3), keepdims=True)
|
|
|
|
|
|
|
|
|
class MaxPool3d(nn.Module):
|
|
|
|
|
|
def forward(self, x):
|
|
|
x = x.max(dim=-1, keepdim=True)[0]
|
|
|
x = x.max(dim=-2, keepdim=True)[0]
|
|
|
x = x.max(dim=-3, keepdim=True)[0]
|
|
|
return x
|
|
|
|
|
|
|
|
|
class GCNNet(nn.Module):
|
|
|
|
|
|
def __init__(self, base_model):
|
|
|
super(GCNNet, self).__init__()
|
|
|
self.backbone = base_model.backbone
|
|
|
self.head = base_model.cls_head
|
|
|
|
|
|
if hasattr(self.head, 'pool'):
|
|
|
pool = self.head.pool
|
|
|
if isinstance(pool, nn.AdaptiveAvgPool3d):
|
|
|
assert pool.output_size == 1
|
|
|
self.head.pool = AvgPool3d()
|
|
|
elif isinstance(pool, nn.AdaptiveMaxPool3d):
|
|
|
assert pool.output_size == 1
|
|
|
self.head.pool = MaxPool3d()
|
|
|
|
|
|
def forward(self, input_tensor):
|
|
|
feat = self.backbone(input_tensor)
|
|
|
cls_score = self.head(feat)
|
|
|
return cls_score
|
|
|
|
|
|
|
|
|
def softmax(x):
|
|
|
x = np.exp(x - x.max())
|
|
|
return x / x.sum()
|
|
|
|
|
|
|
|
|
def main():
|
|
|
args = parse_args()
|
|
|
config = Config.fromfile(args.config)
|
|
|
|
|
|
if config.model.type != 'RecognizerGCN':
|
|
|
print('This script serves the sole purpose of converting PoseC3D '
|
|
|
'skeleton models in MMAction2 to ONNX files. Please note that '
|
|
|
'attempting to convert other models using this script may not '
|
|
|
'yield successful results.\n\n')
|
|
|
|
|
|
init_default_scope(config.get('default_scope', 'mmaction'))
|
|
|
|
|
|
base_model = MODELS.build(config.model)
|
|
|
load_checkpoint(base_model, args.checkpoint, map_location='cpu')
|
|
|
base_model.to(args.device)
|
|
|
|
|
|
num_joints = args.num_joints
|
|
|
image_size = args.image_size
|
|
|
num_frames = args.num_frames
|
|
|
if num_joints == 0:
|
|
|
num_joints = config.model.backbone.in_channels
|
|
|
|
|
|
input_tensor = torch.randn(1, num_joints, num_frames, image_size,
|
|
|
image_size)
|
|
|
input_tensor = input_tensor.clamp(-3, 3).to(args.device)
|
|
|
|
|
|
base_model.eval()
|
|
|
|
|
|
data_sample = ActionDataSample()
|
|
|
data_sample.pred_scores = LabelData()
|
|
|
data_sample.pred_labels = LabelData()
|
|
|
base_output = base_model(
|
|
|
input_tensor.unsqueeze(0), data_samples=[data_sample],
|
|
|
mode='predict')[0]
|
|
|
base_output = base_output.pred_score.detach().cpu().numpy()
|
|
|
|
|
|
model = GCNNet(base_model).to(args.device)
|
|
|
model.eval()
|
|
|
|
|
|
torch.onnx.export(
|
|
|
model, (input_tensor),
|
|
|
args.output_file,
|
|
|
input_names=['input_tensor'],
|
|
|
output_names=['cls_score'],
|
|
|
export_params=True,
|
|
|
do_constant_folding=True,
|
|
|
verbose=False,
|
|
|
opset_version=11,
|
|
|
dynamic_axes={
|
|
|
'input_tensor': {
|
|
|
0: 'batch_size',
|
|
|
2: 'num_frames'
|
|
|
},
|
|
|
'cls_score': {
|
|
|
0: 'batch_size'
|
|
|
}
|
|
|
})
|
|
|
|
|
|
print(f'Successfully export the onnx file to {args.output_file}')
|
|
|
|
|
|
|
|
|
session = onnxruntime.InferenceSession(args.output_file)
|
|
|
input_feed = {'input_tensor': input_tensor.cpu().data.numpy()}
|
|
|
outputs = session.run(['cls_score'], input_feed=input_feed)
|
|
|
output = softmax(outputs[0][0])
|
|
|
|
|
|
diff = abs(base_output - output).max()
|
|
|
if diff < 1e-5:
|
|
|
print('The output difference is smaller than 1e-5.')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|
|
|
|