unum-cloud
/

uform-coreml-onnx

ONNX

Model card Files Files and versions

xet

Community

kimihailv commited on Aug 7, 2023

Commit

8ad0892

1 Parent(s): 3d61f46

Upload convert_model.py

Browse files

Files changed (1) hide show

convert_model.py +30 -10

convert_model.py CHANGED Viewed

@@ -39,9 +39,9 @@ class ImageEncoder(torch.nn.Module):
 def convert_model(opts):
     src_model = uform.get_model(opts.model_name)
-    input_ids = torch.ones(1, 77, dtype=torch.int32)
-    attention_mask = torch.ones(1, 77, dtype=torch.int32)
-    image = torch.ones(1, 3, 224, 224, dtype=torch.float32)
     print('Tracing models…')
     image_encoder = ImageEncoder(src_model.image_encoder).eval()
@@ -51,13 +51,18 @@ def convert_model(opts):
     print('Converting models…')
     image_encoder = ct.convert(
         image_encoder,
         convert_to='mlprogram',
         inputs=[
             ct.TensorType(
                 name='image',
-                shape=(ct.RangeDim(lower_bound=opts.batchsize_lb, upper_bound=opts.batchsize_ub, default=1), 3, 224, 224),
                 dtype=image.numpy().dtype
             )],
         outputs=[
@@ -71,18 +76,23 @@ def convert_model(opts):
         compute_precision=ct.precision.FLOAT16 if opts.use_fp16 else ct.precision.FLOAT32
     )
     text_encoder = ct.convert(
         text_encoder,
         convert_to='mlprogram',
         inputs=[
             ct.TensorType(
                 name='input_ids',
-                shape=(ct.RangeDim(lower_bound=opts.batchsize_lb, upper_bound=opts.batchsize_ub, default=1), 77),
                 dtype=input_ids.numpy().dtype
             ),
             ct.TensorType(
                 name='attention_mask',
-                shape=(ct.RangeDim(lower_bound=opts.batchsize_lb, upper_bound=opts.batchsize_ub, default=1), 77),
                 dtype=attention_mask.numpy().dtype
             )],
         outputs=[
@@ -110,15 +120,25 @@ if __name__ == '__main__':
                       type=str,
                       help='UForm model name')
-    opts.add_argument('--batchsize_lb',
                       action='store',
                       type=int,
-                      help='lower bound of batch size')
-    opts.add_argument('--batchsize_ub',
                       action='store',
                       type=int,
-                      help='upper bound of batch size')
     opts.add_argument('-use_fp16',
                       action='store_true',

 def convert_model(opts):
     src_model = uform.get_model(opts.model_name)
+    input_ids = torch.ones(1, src_model.text_encoder.max_position_embeddings, dtype=torch.int32)
+    attention_mask = torch.ones(1, src_model.text_encoder.max_position_embeddings, dtype=torch.int32)
+    image = torch.ones(1, 3, src_model.image_encoder.image_size, src_model.image_encoder.image_size, dtype=torch.float32)
     print('Tracing models…')
     image_encoder = ImageEncoder(src_model.image_encoder).eval()
     print('Converting models…')
+    if opts.image_batchsize_lb == opts.image_batchsize_ub:
+        image_batch_dim_shape = opts.image_batchsize_lb
+    else:
+        image_batch_dim_shape = ct.RangeDim(lower_bound=opts.image_batchsize_lb, upper_bound=opts.image_batchsize_ub, default=1)
     image_encoder = ct.convert(
         image_encoder,
         convert_to='mlprogram',
         inputs=[
             ct.TensorType(
                 name='image',
+                shape=(image_batch_dim_shape,) + image.shape[1:],
                 dtype=image.numpy().dtype
             )],
         outputs=[
         compute_precision=ct.precision.FLOAT16 if opts.use_fp16 else ct.precision.FLOAT32
     )
+    if opts.text_batchsize_lb == opts.text_batchsize_ub:
+        text_batch_dim_shape = opts.text_batchsize_lb
+    else:
+        text_batch_dim_shape = ct.RangeDim(lower_bound=opts.text_batchsize_lb, upper_bound=opts.text_batchsize_ub, default=1)
     text_encoder = ct.convert(
         text_encoder,
         convert_to='mlprogram',
         inputs=[
             ct.TensorType(
                 name='input_ids',
+                shape=(text_batch_dim_shape,) + input_ids.shape[1:],
                 dtype=input_ids.numpy().dtype
             ),
             ct.TensorType(
                 name='attention_mask',
+                shape=(text_batch_dim_shape,) + attention_mask.shape[1:],
                 dtype=attention_mask.numpy().dtype
             )],
         outputs=[
                       type=str,
                       help='UForm model name')
+    opts.add_argument('--text_batchsize_lb',
+                      action='store',
+                      type=int,
+                      help='lower bound of batch size for text encoder')
+    opts.add_argument('--text_batchsize_ub',
+                      action='store',
+                      type=int,
+                      help='upper bound of batch size for text encoder')
+    opts.add_argument('--image_batchsize_lb',
                       action='store',
                       type=int,
+                      help='lower bound of batch size for image encoder')
+    opts.add_argument('--image_batchsize_ub',
                       action='store',
                       type=int,
+                      help='upper bound of batch size for image encoder')
     opts.add_argument('-use_fp16',
                       action='store_true',