Spaces:
Sleeping
Sleeping
| OUT keys: ['net', 'step'] | |
| module.encoder.embeddings.position_ids: <class 'torch.Tensor'> | |
| shape: (1, 512), dtype: torch.int64 | |
| module.encoder.embeddings.word_embeddings.weight: <class 'torch.Tensor'> | |
| shape: (187, 128), dtype: torch.float32 | |
| module.encoder.embeddings.position_embeddings.weight: <class 'torch.Tensor'> | |
| shape: (512, 128), dtype: torch.float32 | |
| module.encoder.embeddings.token_type_embeddings.weight: <class 'torch.Tensor'> | |
| shape: (2, 128), dtype: torch.float32 | |
| module.encoder.embeddings.LayerNorm.weight: <class 'torch.Tensor'> | |
| shape: (128,), dtype: torch.float32 | |
| module.encoder.embeddings.LayerNorm.bias: <class 'torch.Tensor'> | |
| shape: (128,), dtype: torch.float32 | |
| module.encoder.encoder.embedding_hidden_mapping_in.weight: <class 'torch.Tensor'> | |
| shape: (768, 128), dtype: torch.float32 | |
| module.encoder.encoder.embedding_hidden_mapping_in.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.weight: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.query.weight: <class 'torch.Tensor'> | |
| shape: (768, 768), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.query.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.key.weight: <class 'torch.Tensor'> | |
| shape: (768, 768), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.key.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.value.weight: <class 'torch.Tensor'> | |
| shape: (768, 768), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.value.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.weight: <class 'torch.Tensor'> | |
| shape: (768, 768), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.dense.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.weight: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.attention.LayerNorm.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.ffn.weight: <class 'torch.Tensor'> | |
| shape: (2048, 768), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.ffn.bias: <class 'torch.Tensor'> | |
| shape: (2048,), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.weight: <class 'torch.Tensor'> | |
| shape: (768, 2048), dtype: torch.float32 | |
| module.encoder.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |
| module.encoder.pooler.weight: <class 'torch.Tensor'> | |
| shape: (768, 768), dtype: torch.float32 | |
| module.encoder.pooler.bias: <class 'torch.Tensor'> | |
| shape: (768,), dtype: torch.float32 | |