added model

Browse files

Files changed (3) hide show

whisper_gpu_int4_gpu-cuda_metrics.json +7 -0
whisper_gpu_int4_gpu-cuda_model.json +212 -0
whisper_gpu_int4_gpu-cuda_model.onnx +3 -0

whisper_gpu_int4_gpu-cuda_metrics.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "latency-avg": {
+        "value": 404.89314,
+        "priority": 1,
+        "higher_is_better": false
+    }
+}

whisper_gpu_int4_gpu-cuda_model.json ADDED Viewed

	@@ -0,0 +1,212 @@

+{
+    "type": "ONNXModel",
+    "config": {
+        "model_path": "E:\\Work\\Dimq1\\source\\OpenAI\\Olive\\examples\\whisper\\models\\conversion-onnx_dynamic_quantization-insert_beam_search-prepost\\whisper_gpu_int4_gpu-cuda_model.onnx",
+        "onnx_file_name": null,
+        "inference_settings": null,
+        "use_ort_extensions": true,
+        "model_attributes": {
+            "vocab_size": 51865,
+            "num_mel_bins": 80,
+            "d_model": 384,
+            "encoder_layers": 4,
+            "encoder_attention_heads": 6,
+            "decoder_layers": 4,
+            "decoder_attention_heads": 6,
+            "decoder_ffn_dim": 1536,
+            "encoder_ffn_dim": 1536,
+            "dropout": 0.0,
+            "attention_dropout": 0.0,
+            "activation_dropout": 0.0,
+            "activation_function": "gelu",
+            "init_std": 0.02,
+            "encoder_layerdrop": 0.0,
+            "decoder_layerdrop": 0.0,
+            "use_cache": true,
+            "num_hidden_layers": 4,
+            "scale_embedding": false,
+            "max_source_positions": 1500,
+            "max_target_positions": 448,
+            "classifier_proj_size": 256,
+            "use_weighted_layer_sum": false,
+            "apply_spec_augment": false,
+            "mask_time_prob": 0.05,
+            "mask_time_length": 10,
+            "mask_time_min_masks": 2,
+            "mask_feature_prob": 0.0,
+            "mask_feature_length": 10,
+            "mask_feature_min_masks": 0,
+            "median_filter_width": 7,
+            "return_dict": true,
+            "output_hidden_states": false,
+            "output_attentions": false,
+            "torchscript": false,
+            "torch_dtype": "float32",
+            "use_bfloat16": false,
+            "tf_legacy_loss": false,
+            "pruned_heads": {},
+            "tie_word_embeddings": true,
+            "is_encoder_decoder": true,
+            "is_decoder": false,
+            "cross_attention_hidden_size": null,
+            "add_cross_attention": false,
+            "tie_encoder_decoder": false,
+            "max_length": 448,
+            "min_length": 0,
+            "do_sample": false,
+            "early_stopping": false,
+            "num_beams": 1,
+            "num_beam_groups": 1,
+            "diversity_penalty": 0.0,
+            "temperature": 1.0,
+            "top_k": 50,
+            "top_p": 1.0,
+            "typical_p": 1.0,
+            "repetition_penalty": 1.0,
+            "length_penalty": 1.0,
+            "no_repeat_ngram_size": 0,
+            "encoder_no_repeat_ngram_size": 0,
+            "bad_words_ids": null,
+            "num_return_sequences": 1,
+            "chunk_size_feed_forward": 0,
+            "output_scores": false,
+            "return_dict_in_generate": false,
+            "forced_bos_token_id": null,
+            "forced_eos_token_id": null,
+            "remove_invalid_values": false,
+            "exponential_decay_length_penalty": null,
+            "suppress_tokens": [
+                1,
+                2,
+                7,
+                8,
+                9,
+                10,
+                14,
+                25,
+                26,
+                27,
+                28,
+                29,
+                31,
+                58,
+                59,
+                60,
+                61,
+                62,
+                63,
+                90,
+                91,
+                92,
+                93,
+                359,
+                503,
+                522,
+                542,
+                873,
+                893,
+                902,
+                918,
+                922,
+                931,
+                1350,
+                1853,
+                1982,
+                2460,
+                2627,
+                3246,
+                3253,
+                3268,
+                3536,
+                3846,
+                3961,
+                4183,
+                4667,
+                6585,
+                6647,
+                7273,
+                9061,
+                9383,
+                10428,
+                10929,
+                11938,
+                12033,
+                12331,
+                12562,
+                13793,
+                14157,
+                14635,
+                15265,
+                15618,
+                16553,
+                16604,
+                18362,
+                18956,
+                20075,
+                21675,
+                22520,
+                26130,
+                26161,
+                26435,
+                28279,
+                29464,
+                31650,
+                32302,
+                32470,
+                36865,
+                42863,
+                47425,
+                49870,
+                50254,
+                50258,
+                50358,
+                50359,
+                50360,
+                50361,
+                50362
+            ],
+            "begin_suppress_tokens": [
+                220,
+                50257
+            ],
+            "architectures": [
+                "WhisperForConditionalGeneration"
+            ],
+            "finetuning_task": null,
+            "id2label": {
+                "0": "LABEL_0",
+                "1": "LABEL_1"
+            },
+            "label2id": {
+                "LABEL_0": 0,
+                "LABEL_1": 1
+            },
+            "tokenizer_class": null,
+            "prefix": null,
+            "bos_token_id": 50257,
+            "pad_token_id": 50257,
+            "eos_token_id": 50257,
+            "sep_token_id": null,
+            "decoder_start_token_id": 50258,
+            "task_specific_params": null,
+            "problem_type": null,
+            "_name_or_path": "openai/whisper-tiny",
+            "transformers_version": "4.35.2",
+            "forced_decoder_ids": [
+                [
+                    1,
+                    50259
+                ],
+                [
+                    2,
+                    50359
+                ],
+                [
+                    3,
+                    50363
+                ]
+            ],
+            "model_type": "whisper"
+        }
+    }
+}

whisper_gpu_int4_gpu-cuda_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6b5eb5028a05fe420d5342e41f72b0fe106f4f8b80630febe0f7960adc33090
+size 112489363