Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

audio2m_speech_2nodes/README.md +10 -0
audio2m_speech_2nodes/config.json +143 -0
audio2m_speech_2nodes/model.safetensors +3 -0
audio2m_speech_4nodes/README.md +10 -0
audio2m_speech_4nodes/config.json +143 -0
audio2m_speech_4nodes/model.safetensors +3 -0

audio2m_speech_2nodes/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: [More Information Needed]
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

audio2m_speech_2nodes/config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "label_rate": 25,
+  "num_codebooks": 8,
+  "codebook_dim": 16,
+  "codebook_size": 2048,
+  "features": [
+    "melspec_2048"
+  ],
+  "hop_length": 240,
+  "n_mels": 128,
+  "conv_dim": 512,
+  "encoder_dim": 1024,
+  "encoder_depth": 12,
+  "mask_hop": 0.4,
+  "mask_prob": 0.6,
+  "is_flash": false,
+  "stat": {
+    "melspec_2048_cnt": 14282760192,
+    "melspec_2048_mean": 6.768444971712967,
+    "melspec_2048_std": 18.417922652295623
+  },
+  "w2v2_config": {
+    "activation_dropout": 0.1,
+    "adapter_kernel_size": 3,
+    "adapter_stride": 2,
+    "add_adapter": false,
+    "apply_spec_augment": true,
+    "architectures": [
+      "Wav2Vec2ConformerForCTC"
+    ],
+    "attention_dropout": 0.1,
+    "bos_token_id": 1,
+    "classifier_proj_size": 256,
+    "codevector_dim": 768,
+    "conformer_conv_dropout": 0.1,
+    "contrastive_logits_temperature": 0.1,
+    "conv_bias": true,
+    "conv_depthwise_kernel_size": 31,
+    "conv_dim": [
+      512,
+      512,
+      512,
+      512,
+      512,
+      512,
+      512
+    ],
+    "conv_kernel": [
+      10,
+      3,
+      3,
+      3,
+      3,
+      2,
+      2
+    ],
+    "conv_stride": [
+      5,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2
+    ],
+    "ctc_loss_reduction": "sum",
+    "ctc_zero_infinity": false,
+    "diversity_loss_weight": 0.1,
+    "do_stable_layer_norm": true,
+    "eos_token_id": 2,
+    "feat_extract_activation": "gelu",
+    "feat_extract_dropout": 0.0,
+    "feat_extract_norm": "layer",
+    "feat_proj_dropout": 0.1,
+    "feat_quantizer_dropout": 0.0,
+    "final_dropout": 0.1,
+    "gradient_checkpointing": false,
+    "hidden_act": "swish",
+    "hidden_dropout": 0.1,
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 1024,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "layerdrop": 0.0,
+    "mask_feature_length": 10,
+    "mask_feature_min_masks": 0,
+    "mask_feature_prob": 0.0,
+    "mask_time_length": 10,
+    "mask_time_min_masks": 2,
+    "mask_time_prob": 0.05,
+    "max_source_positions": 5000,
+    "model_type": "wav2vec2-conformer",
+    "num_adapter_layers": 3,
+    "num_attention_heads": 16,
+    "num_codevector_groups": 2,
+    "num_codevectors_per_group": 320,
+    "num_conv_pos_embedding_groups": 16,
+    "num_conv_pos_embeddings": 128,
+    "num_feat_extract_layers": 7,
+    "num_hidden_layers": 24,
+    "num_negatives": 100,
+    "output_hidden_size": 1024,
+    "pad_token_id": 0,
+    "position_embeddings_type": "rotary",
+    "proj_codevector_dim": 768,
+    "rotary_embedding_base": 10000,
+    "tdnn_dilation": [
+      1,
+      2,
+      3,
+      1,
+      1
+    ],
+    "tdnn_dim": [
+      512,
+      512,
+      512,
+      512,
+      1500
+    ],
+    "tdnn_kernel": [
+      5,
+      3,
+      3,
+      1,
+      1
+    ],
+    "torch_dtype": "float32",
+    "transformers_version": "4.19.0.dev0",
+    "use_weighted_layer_sum": false,
+    "vocab_size": 32,
+    "xvector_output_dim": 512
+  },
+  "use_rvq_target": true,
+  "use_vq_target": false,
+  "use_encodec_target": false,
+  "rvq_ckpt_path": null,
+  "recon_loss_ratio": null,
+  "resume_checkpoint": null,
+  "rvq_n_codebooks": 8,
+  "rvq_multi_layer_num": 1
+}

audio2m_speech_2nodes/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff6ebd6d1900206c9e3d32ef9e08563435444c41318a1112f5a972e1ea40f0b7
+size 1367412264

audio2m_speech_4nodes/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: [More Information Needed]
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

audio2m_speech_4nodes/config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "label_rate": 25,
+  "num_codebooks": 8,
+  "codebook_dim": 16,
+  "codebook_size": 2048,
+  "features": [
+    "melspec_2048"
+  ],
+  "hop_length": 240,
+  "n_mels": 128,
+  "conv_dim": 512,
+  "encoder_dim": 1024,
+  "encoder_depth": 12,
+  "mask_hop": 0.4,
+  "mask_prob": 0.6,
+  "is_flash": false,
+  "stat": {
+    "melspec_2048_cnt": 14282760192,
+    "melspec_2048_mean": 6.768444971712967,
+    "melspec_2048_std": 18.417922652295623
+  },
+  "w2v2_config": {
+    "activation_dropout": 0.1,
+    "adapter_kernel_size": 3,
+    "adapter_stride": 2,
+    "add_adapter": false,
+    "apply_spec_augment": true,
+    "architectures": [
+      "Wav2Vec2ConformerForCTC"
+    ],
+    "attention_dropout": 0.1,
+    "bos_token_id": 1,
+    "classifier_proj_size": 256,
+    "codevector_dim": 768,
+    "conformer_conv_dropout": 0.1,
+    "contrastive_logits_temperature": 0.1,
+    "conv_bias": true,
+    "conv_depthwise_kernel_size": 31,
+    "conv_dim": [
+      512,
+      512,
+      512,
+      512,
+      512,
+      512,
+      512
+    ],
+    "conv_kernel": [
+      10,
+      3,
+      3,
+      3,
+      3,
+      2,
+      2
+    ],
+    "conv_stride": [
+      5,
+      2,
+      2,
+      2,
+      2,
+      2,
+      2
+    ],
+    "ctc_loss_reduction": "sum",
+    "ctc_zero_infinity": false,
+    "diversity_loss_weight": 0.1,
+    "do_stable_layer_norm": true,
+    "eos_token_id": 2,
+    "feat_extract_activation": "gelu",
+    "feat_extract_dropout": 0.0,
+    "feat_extract_norm": "layer",
+    "feat_proj_dropout": 0.1,
+    "feat_quantizer_dropout": 0.0,
+    "final_dropout": 0.1,
+    "gradient_checkpointing": false,
+    "hidden_act": "swish",
+    "hidden_dropout": 0.1,
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 1024,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "layer_norm_eps": 1e-05,
+    "layerdrop": 0.0,
+    "mask_feature_length": 10,
+    "mask_feature_min_masks": 0,
+    "mask_feature_prob": 0.0,
+    "mask_time_length": 10,
+    "mask_time_min_masks": 2,
+    "mask_time_prob": 0.05,
+    "max_source_positions": 5000,
+    "model_type": "wav2vec2-conformer",
+    "num_adapter_layers": 3,
+    "num_attention_heads": 16,
+    "num_codevector_groups": 2,
+    "num_codevectors_per_group": 320,
+    "num_conv_pos_embedding_groups": 16,
+    "num_conv_pos_embeddings": 128,
+    "num_feat_extract_layers": 7,
+    "num_hidden_layers": 24,
+    "num_negatives": 100,
+    "output_hidden_size": 1024,
+    "pad_token_id": 0,
+    "position_embeddings_type": "rotary",
+    "proj_codevector_dim": 768,
+    "rotary_embedding_base": 10000,
+    "tdnn_dilation": [
+      1,
+      2,
+      3,
+      1,
+      1
+    ],
+    "tdnn_dim": [
+      512,
+      512,
+      512,
+      512,
+      1500
+    ],
+    "tdnn_kernel": [
+      5,
+      3,
+      3,
+      1,
+      1
+    ],
+    "torch_dtype": "float32",
+    "transformers_version": "4.19.0.dev0",
+    "use_weighted_layer_sum": false,
+    "vocab_size": 32,
+    "xvector_output_dim": 512
+  },
+  "use_rvq_target": true,
+  "use_vq_target": false,
+  "use_encodec_target": false,
+  "rvq_ckpt_path": null,
+  "recon_loss_ratio": null,
+  "resume_checkpoint": null,
+  "rvq_n_codebooks": 8,
+  "rvq_multi_layer_num": 1
+}

audio2m_speech_4nodes/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:612a886ea4af2836923ba81a8ab0d21ae037997b25112dca3b518d0e98e5461d
+size 1367412264