Spaces:

BMP
/

campp-mlx-converter

Sleeping

App Files Files Community

BMP commited on Jan 16

Commit

ca0ebee

1 Parent(s): fcdbc84

Refactor activation script and enhance conversion utilities; add parameter mapping and filtering logic; update requirements and add test for parameter mapping

Browse files

Files changed (5) hide show

activate.sh +1 -1
app.py +108 -203
conversion_utils.py +320 -1
requirements.txt +2 -1
test_mapping.py +66 -0

activate.sh CHANGED Viewed

@@ -2,4 +2,4 @@
 # Script to activate the Python virtual environment
 # Note: To activate the venv in your current shell, run: source activate.sh
 # Running ./activate.sh will activate it in a subshell, which won't affect your shell.
-source venv/bin/activate

 # Script to activate the Python virtual environment
 # Note: To activate the venv in your current shell, run: source activate.sh
 # Running ./activate.sh will activate it in a subshell, which won't affect your shell.
+source .venv/bin/activate

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import torch
 import mlx.core as mx
 import mlx.nn as nn
-from huggingface_hub import HfApi, upload_file, snapshot_download, hf_hub_download
 import tempfile
 import json
 import os
@@ -46,7 +46,7 @@ class CAMPPConverter:
             return ERROR_INVALID_REPO
         try:
-            return self._perform_conversion(input_repo, output_name, hf_token, quantize)
         except Exception as e:
             error_msg = f"Conversion failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
             logger.error(error_msg)
@@ -62,10 +62,10 @@ class CAMPPConverter:
             logger.info(status)
             try:
-                model_dir = snapshot_download(
-                    repo_id=input_repo,
-                    local_dir=f"{temp_dir}/original",
-                    token=hf_token
                 )
             except Exception as e:
                 return f"❌ Failed to download model: {str(e)}"
@@ -76,7 +76,7 @@ class CAMPPConverter:
             pytorch_model_path = self._find_pytorch_model(model_dir)
             if not pytorch_model_path:
-                return "No PyTorch model file found. Expected: pytorch_model.bin, model.safetensors, or checkpoint.pth"
             # Load weights
             try:
@@ -85,6 +85,12 @@ class CAMPPConverter:
                     weights = load_file(pytorch_model_path)
                 else:
                     weights = torch.load(pytorch_model_path, map_location='cpu')
             except Exception as e:
                 return f"Failed to load weights: {str(e)}"
@@ -157,6 +163,8 @@ class CAMPPConverter:
                                 input_repo, output_name, hf_token, quantize, bits=32):
         """Create and upload a single model version"""
         # Create model directory
         if quantize:
             dir_name = f"mlx_q{bits}"
@@ -221,15 +229,38 @@ HF Link: https://huggingface.co/{repo_id}
     def _find_pytorch_model(self, model_dir: str) -> Optional[str]:
         """Find PyTorch model file in directory"""
         possible_files = [
-            "pytorch_model.bin", "model.safetensors",
-            "checkpoint.pth", "model.pth", "best_model.pth"
         ]
-        for file in possible_files:
-            path = os.path.join(model_dir, file)
-            if os.path.exists(path):
-                return path
         return None
     def _validate_campp_architecture(self, weights: Dict) -> bool:
@@ -415,26 +446,32 @@ https://arxiv.org/abs/2303.00332
 converter = CAMPPConverter()
 # Create Gradio interface
-def convert_interface(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8):
-    return converter.convert_model(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8)
-def convert_modelscope_model(hf_token, quantize_q2, quantize_q4, quantize_q8):
-    """Download and convert the ModelScope CAM++ model"""
-    input_repo = "modelscope/speech_campplus_sv_zh-cn_16k-common"
-    output_name = "campp-zh-cn-16k-mlx"
-    return converter.convert_model(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8)
-def convert_3dspeaker_model(hf_token, quantize_q2, quantize_q4, quantize_q8):
-    """Download and convert the 3dspeaker VoxCeleb CAM++ model"""
-    input_repo = "3dspeaker/campplus-voxceleb"
-    output_name = "campp-voxceleb-mlx"
-    return converter.convert_model(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8)
-def convert_3dspeaker_cnceleb_model(hf_token, quantize_q2, quantize_q4, quantize_q8):
-    """Download and convert the 3dspeaker CN-Celeb CAM++ model"""
-    input_repo = "3dspeaker/campplus-cnceleb"
-    output_name = "campp-cnceleb-mlx"
-    return converter.convert_model(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8)
 # Gradio UI
 with gr.Blocks(title="🎤 CAM++ MLX Converter") as interface:
@@ -442,67 +479,40 @@ with gr.Blocks(title="🎤 CAM++ MLX Converter") as interface:
     gr.Markdown("*Transform PyTorch CAM++ models into optimized Apple MLX format*")
     gr.Markdown("---")
-    # Quick Actions Section
-    with gr.Accordion("⚡ Quick Convert (Recommended)", open=True):
-        gr.Markdown("**Choose your model variant:**")
-        with gr.Row():
-            with gr.Column():
-                modelscope_btn = gr.Button("🚀 ModelScope\nChinese Speech", variant="secondary", size="lg")
-                gr.Markdown("*General Chinese speech recognition*")
-            with gr.Column():
-                dspeaker_btn = gr.Button("🌍 VoxCeleb\nMultilingual", variant="secondary", size="lg")
-                gr.Markdown("*English + European languages*")
-            with gr.Column():
-                cnceleb_btn = gr.Button("🇨🇳 CN-Celeb\nPremium Chinese", variant="secondary", size="lg")
-                gr.Markdown("*High-quality Chinese celebrity speech*")
     gr.Markdown("---")
-    # Manual Conversion Section
-    with gr.Accordion("🔧 Manual Conversion", open=False):
-        with gr.Row():
-            with gr.Column(scale=2):
-                gr.Markdown("### Model Configuration")
-                input_repo = gr.Textbox(
-                    label="📥 Input Repository",
-                    placeholder="username/campp-model",
-                    info="Hugging Face repository with PyTorch CAM++ model"
-                )
-                output_name = gr.Textbox(
-                    label="📤 Output Name",
-                    placeholder="campp-speaker-recognition",
-                    info="Name for the converted MLX model"
-                )
-                hf_token = gr.Textbox(
-                    label="🔑 Hugging Face Token",
-                    placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
-                    type="password",
-                    info="Token with write access to mlx-community"
-                )
-            with gr.Column(scale=1):
-                gr.Markdown("### ⚡ Quantization Options")
-                gr.Markdown("**Choose compression levels:**")
-                quantize_q2 = gr.Checkbox(
-                    label="🗜️ Q2 (2-bit)",
-                    value=False,
-                    info="Ultra-compressed for edge devices"
-                )
-                quantize_q4 = gr.Checkbox(
-                    label="⚖️ Q4 (4-bit)",
-                    value=True,
-                    info="Balanced quality & size (recommended)"
-                )
-                quantize_q8 = gr.Checkbox(
-                    label="🎯 Q8 (8-bit)",
-                    value=False,
-                    info="High quality, moderate compression"
-                )
-                gr.Markdown("---")
-                convert_btn = gr.Button("🚀 Start Conversion", variant="primary", size="lg")
     # Status and Results
     with gr.Accordion("📊 Conversion Status", open=True):
         output = gr.Textbox(
@@ -511,127 +521,22 @@ with gr.Blocks(title="🎤 CAM++ MLX Converter") as interface:
             max_lines=25,
             interactive=False
         )
-    # Examples
-    with gr.Accordion("📋 Example Models", open=False):
-        gr.Examples(
-            examples=[
-                ["modelscope/speech_campplus_sv_zh-cn_16k-common", "campp-chinese-16k", "", False, True, False],
-                ["3dspeaker/campplus-voxceleb", "campp-voxceleb", "", False, True, False],
-                ["3dspeaker/campplus-cnceleb", "campp-cnceleb", "", False, True, False],
-            ],
-            inputs=[input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8],
-            label="Click to load example configurations"
-        )
-    # Instructions
-    with gr.Accordion("📖 Instructions & Guide", open=False):
-        gr.Markdown("""
-        ## 🚀 Quick Start Guide
-        ### One-Click Conversion (Recommended)
-        Choose the appropriate model for your language needs:
-        | Button | Language | Dataset | Quality | Use Case |
-        |--------|----------|---------|---------|----------|
-        | 🚀 **ModelScope** | Chinese | General speech | Good | Broad Chinese applications |
-        | 🌍 **VoxCeleb** | Multilingual | Celebrity interviews | Excellent | English + European languages |
-        | 🇨🇳 **CN-Celeb** | Chinese | Celebrity speech | Best | High-quality Chinese SV |
-        ### Quantization Options
-        Choose the right compression level for your needs:
-        - **Q2 (2-bit)**: 25% size, minimal quality loss → **Edge devices, mobile**
-        - **Q4 (4-bit)**: 50% size, excellent quality → **Most applications** ⭐
-        - **Q8 (8-bit)**: 75% size, near-perfect quality → **Quality-critical tasks**
-        ### Manual Conversion
-        For custom models from Hugging Face:
-        1. **Find a CAM++ Model**: Search for `campp` or `speaker verification` on HF
-        2. **Enter Repository**: Format `username/model-name`
-        3. **Set Output Name**: Choose a descriptive name
-        4. **Add HF Token**: Get from https://huggingface.co/settings/tokens
-        5. **Select Quantization**: Choose compression levels
-        6. **Convert**: Click the button and wait for completion
-        ## 📊 Performance Expectations
-        ### Model Sizes (Approximate):
-        - **Regular (FP32)**: ~50-100MB
-        - **Q8**: ~40-80MB
-        - **Q4**: ~25-50MB ⭐
-        - **Q2**: ~15-30MB
-        ### Inference Speed (Apple Silicon):
-        - **Regular**: Baseline performance
-        - **Q8**: ~1.1x faster
-        - **Q4**: ~1.3x faster
-        - **Q2**: ~1.5x faster
-        ## 🔧 Troubleshooting
-        ### Common Issues:
-        - **"Module not found"**: Ensure all dependencies are installed
-        - **"Permission denied"**: Check your HF token has write access
-        - **"Port already in use"**: The app may restart automatically
-        - **"Conversion failed"**: Check model compatibility (must be CAM++)
-        ### Token Requirements:
-        - Must have **write access** to `mlx-community` organization
-        - Generate at: https://huggingface.co/settings/tokens
-        - Select role: `Write` when creating
-        ## 🎯 Best Practices
-        - **For production**: Use Q4 quantization for optimal balance
-        - **For development**: Keep regular version for debugging
-        - **For mobile**: Use Q2 for maximum compression
-        - **For accuracy**: Use CN-Celeb or VoxCeleb over generic models
-        ## 📝 Output Format
-        Each conversion creates MLX models ready for Apple Silicon:
-        ```
-        mlx-community/your-model-name/
-        ├── model.py              # MLX implementation
-        ├── weights.npz          # Quantized weights
-        ├── config.json          # Model configuration
-        ├── usage_example.py     # Usage examples
-        └── README.md            # Documentation
-        ```
-        ## 🆘 Support
-        - Check the conversion logs for detailed error messages
-        - Ensure your model is a PyTorch CAM++ implementation
-        - Test with the provided example models first
-        """)
     convert_btn.click(
         fn=convert_interface,
-        inputs=[input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8],
-        outputs=[output]
-    )
-    modelscope_btn.click(
-        fn=convert_modelscope_model,
-        inputs=[hf_token, quantize_q2, quantize_q4, quantize_q8],
         outputs=[output]
     )
-    dspeaker_btn.click(
-        fn=convert_3dspeaker_model,
-        inputs=[hf_token, quantize_q2, quantize_q4, quantize_q8],
-        outputs=[output]
     )
-    cnceleb_btn.click(
-        fn=convert_3dspeaker_cnceleb_model,
-        inputs=[hf_token, quantize_q2, quantize_q4, quantize_q8],
-        outputs=[output]
     )
 if __name__ == "__main__":
-    interface.launch(server_port=7864, theme=gr.themes.Soft())

 import torch
 import mlx.core as mx
 import mlx.nn as nn
+from huggingface_hub import HfApi, upload_file, hf_hub_download
 import tempfile
 import json
 import os
             return ERROR_INVALID_REPO
         try:
+            return self._perform_conversion(input_repo, output_name, hf_token, quantize_q2, quantize_q4, quantize_q8)
         except Exception as e:
             error_msg = f"Conversion failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
             logger.error(error_msg)
             logger.info(status)
             try:
+                from modelscope import snapshot_download as ms_snapshot_download
+                model_dir = ms_snapshot_download(
+                    model_id=input_repo,
+                    local_dir=f"{temp_dir}/original"
                 )
             except Exception as e:
                 return f"❌ Failed to download model: {str(e)}"
             pytorch_model_path = self._find_pytorch_model(model_dir)
             if not pytorch_model_path:
+                return "No PyTorch model file found. Check logs for available files."
             # Load weights
             try:
                     weights = load_file(pytorch_model_path)
                 else:
                     weights = torch.load(pytorch_model_path, map_location='cpu')
+                    # If loaded object is a model (not state_dict), get state_dict
+                    if not isinstance(weights, dict):
+                        if hasattr(weights, 'state_dict'):
+                            weights = weights.state_dict()
+                        else:
+                            return f"Loaded object is not a valid PyTorch state_dict or model: {type(weights)}"
             except Exception as e:
                 return f"Failed to load weights: {str(e)}"
                                 input_repo, output_name, hf_token, quantize, bits=32):
         """Create and upload a single model version"""
+        repo_id = f"mlx-community/{output_name}"
         # Create model directory
         if quantize:
             dir_name = f"mlx_q{bits}"
     def _find_pytorch_model(self, model_dir: str) -> Optional[str]:
         """Find PyTorch model file in directory"""
+        # Search recursively
+        for root, dirs, files in os.walk(model_dir):
+            for file in files:
+                # Prioritize .bin and .pt files containing 'campplus' (ModelScope models)
+                if (file.endswith('.bin') or file.endswith('.pt')) and 'campplus' in file.lower():
+                    return os.path.join(root, file)
+        # Fallback to other common model files
         possible_files = [
+            "pytorch_model.bin", "model.safetensors", "checkpoint.pth",
+            "model.pth", "best_model.pth", "model.bin", "checkpoint.bin",
+            "best_model.bin", "pytorch_model.pth", "model.pt", "checkpoint.pt"
         ]
+        for root, dirs, files in os.walk(model_dir):
+            for file in files:
+                if file in possible_files:
+                    return os.path.join(root, file)
+        # Last resort: any .bin or .pt file
+        for root, dirs, files in os.walk(model_dir):
+            for file in files:
+                if file.endswith('.bin') or file.endswith('.pt'):
+                    return os.path.join(root, file)
+        # Log what files were found
+        all_files = []
+        for root, dirs, files in os.walk(model_dir):
+            for file in files:
+                all_files.append(os.path.join(root, file))
+        logger.warning(f"No PyTorch model file found in {model_dir}. Available files: {all_files}")
         return None
     def _validate_campp_architecture(self, weights: Dict) -> bool:
 converter = CAMPPConverter()
 # Create Gradio interface
+def convert_interface(input_repo, output_name, hf_token):
+    return converter.convert_model(input_repo, output_name, hf_token, False, True, False)
+def fill_modelscope():
+    return "iic/speech_campplus_sv_zh-cn_16k-common"
+def fill_voxceleb():
+    return "iic/speech_campplus_sv_zh_en_16k-common_advanced"
+def fill_cnceleb():
+    return "iic/speech_campplus_sv_zh-cn_16k-common"
+def auto_fill_name(repo):
+    if not repo:
+        return ""
+    # Custom names for specific models
+    if repo == "iic/speech_campplus_sv_zh_en_16k-common_advanced":
+        return "campplus_multilingual_16k_advanced"
+    elif repo == "iic/speech_campplus_sv_zh-cn_16k-common":
+        return "campplus_chinese_16k_common"
+    # Fallback to last part of repo name
+    if '/' in repo:
+        return repo.split('/')[-1]
+    return ""
 # Gradio UI
 with gr.Blocks(title="🎤 CAM++ MLX Converter") as interface:
     gr.Markdown("*Transform PyTorch CAM++ models into optimized Apple MLX format*")
     gr.Markdown("---")
+    # Example Models Row
+    gr.Markdown("### 🎯 Choose a Model")
+    with gr.Row():
+        chinese_btn = gr.Button("🚀 Chinese (Basic)", variant="secondary")
+        advanced_btn = gr.Button("🌍 Chinese-English (Advanced)", variant="secondary")
     gr.Markdown("---")
+    # Model Configuration Section
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("### Model Configuration")
+            input_repo = gr.Textbox(
+                label="📥 Input Repository",
+                placeholder="iic/speech_campplus_sv_zh-cn_16k-common",
+                info="ModelScope repository with PyTorch CAM++ model"
+            )
+            output_name = gr.Textbox(
+                label="📤 Output Name",
+                placeholder="campp-speaker-recognition",
+                info="Name for the converted MLX model"
+            )
+            input_repo.change(fn=auto_fill_name, inputs=input_repo, outputs=output_name)
+            hf_token = gr.Textbox(
+                label="🔑 Hugging Face Token",
+                placeholder="hf_xxxxxxxxxxxxxxxxxxxx",
+                type="password",
+                info="Token with write access to mlx-community"
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            convert_btn = gr.Button("🚀 Start Conversion", variant="primary", size="lg")
     # Status and Results
     with gr.Accordion("📊 Conversion Status", open=True):
         output = gr.Textbox(
             max_lines=25,
             interactive=False
         )
     convert_btn.click(
         fn=convert_interface,
+        inputs=[input_repo, output_name, hf_token],
         outputs=[output]
     )
+    chinese_btn.click(
+        fn=fill_modelscope,
+        outputs=[input_repo]
     )
+    advanced_btn.click(
+        fn=fill_voxceleb,
+        outputs=[input_repo]
     )
 if __name__ == "__main__":
+    interface.launch(server_port=7865)

conversion_utils.py CHANGED Viewed

@@ -29,8 +29,17 @@ class ConversionUtils:
         mlx_weights = {}
         model_config = self._analyze_model_structure(pytorch_weights)
         # Convert each weight tensor
-        for name, tensor in pytorch_weights.items():
             if isinstance(tensor, torch.Tensor):
                 mlx_weights[name] = self._convert_tensor(name, tensor)
             else:
@@ -39,6 +48,316 @@ class ConversionUtils:
         return mlx_weights, model_config
     def _convert_tensor(self, name: str, tensor: torch.Tensor) -> mx.array:
         """Convert individual tensor based on layer type"""

         mlx_weights = {}
         model_config = self._analyze_model_structure(pytorch_weights)
+        # Filter out unnecessary parameters (BatchNorm running stats, etc.)
+        filtered_weights = self._filter_weights(pytorch_weights)
+        # Map parameter names from PyTorch to MLX format
+        mapped_weights = self._map_parameter_names(filtered_weights)
+        # Add default values for missing MLX parameters
+        mapped_weights = self._add_missing_parameters(mapped_weights, model_config)
         # Convert each weight tensor
+        for name, tensor in mapped_weights.items():
             if isinstance(tensor, torch.Tensor):
                 mlx_weights[name] = self._convert_tensor(name, tensor)
             else:
         return mlx_weights, model_config
+    def _map_parameter_names(self, pytorch_weights: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Map PyTorch parameter names to MLX parameter names
+        Args:
+            pytorch_weights: PyTorch weights with original names
+        Returns:
+            Weights with MLX-compatible parameter names
+        """
+        mapped_weights = {}
+        for name, tensor in pytorch_weights.items():
+            # Map xvector parameter names to MLX names
+            mlx_name = self._xvector_to_mlx_name(name)
+            if mlx_name:  # Only keep parameters that have MLX equivalents
+                mapped_weights[mlx_name] = tensor
+        return mapped_weights
+    def _add_missing_parameters(self, mapped_weights: Dict[str, torch.Tensor], model_config: Dict) -> Dict[str, torch.Tensor]:
+        """
+        Add default values for MLX parameters that don't have PyTorch equivalents
+        Args:
+            mapped_weights: Already mapped weights
+            model_config: Model configuration
+        Returns:
+            Weights with missing parameters added
+        """
+        import torch.nn.init as init
+        # Get input dimensions from model config
+        input_dim = model_config.get('input_dim', 80)  # Default mel spectrogram features
+        # Input convolution parameters (Conv1d: input_dim -> 64, kernel_size=3, padding=1, bias=False)
+        if 'input_conv.weight' not in mapped_weights:
+            weight = torch.empty(64, input_dim, 3)  # (out_channels, in_channels, kernel_size)
+            init.xavier_uniform_(weight)
+            mapped_weights['input_conv.weight'] = weight
+        # Input batch norm parameters
+        if 'input_bn.bias' not in mapped_weights:
+            mapped_weights['input_bn.bias'] = torch.zeros(64)
+        if 'input_bn.weight' not in mapped_weights:
+            mapped_weights['input_bn.weight'] = torch.ones(64)
+        if 'input_bn.running_mean' not in mapped_weights:
+            mapped_weights['input_bn.running_mean'] = torch.zeros(64)
+        if 'input_bn.running_var' not in mapped_weights:
+            mapped_weights['input_bn.running_var'] = torch.ones(64)
+        # CAM parameters
+        mask_channels = 256  # From CAMPPModel default
+        in_channels = model_config.get('channels', 512)  # Approximate
+        # cam.bn.running_mean, cam.bn.running_var
+        if 'cam.bn.running_mean' not in mapped_weights:
+            mapped_weights['cam.bn.running_mean'] = torch.zeros(mask_channels)
+        if 'cam.bn.running_var' not in mapped_weights:
+            mapped_weights['cam.bn.running_var'] = torch.ones(mask_channels)
+        # cam.context_conv5.weight (Conv1d: in_channels -> mask_channels, kernel_size=5)
+        if 'cam.context_conv5.weight' not in mapped_weights:
+            weight = torch.empty(mask_channels, in_channels, 5)
+            init.xavier_uniform_(weight)
+            mapped_weights['cam.context_conv5.weight'] = weight
+        # cam.mask_conv.bias, cam.mask_conv.weight (Conv1d: mask_channels -> in_channels, kernel_size=1, bias=True)
+        if 'cam.mask_conv.bias' not in mapped_weights:
+            mapped_weights['cam.mask_conv.bias'] = torch.zeros(in_channels)
+        if 'cam.mask_conv.weight' not in mapped_weights:
+            weight = torch.empty(in_channels, mask_channels, 1)
+            init.xavier_uniform_(weight)
+            mapped_weights['cam.mask_conv.weight'] = weight
+        # Channel gating parameters
+        if 'channel_gating.fc.layers.2.weight' not in mapped_weights:
+            # FC layer: channels -> channels, bias=False
+            weight = torch.empty(in_channels, in_channels)
+            init.xavier_uniform_(weight)
+            mapped_weights['channel_gating.fc.layers.2.weight'] = weight
+        # Pooling parameters
+        embedding_dim = model_config.get('embedding_dim', 512)
+        if 'pooling.attention_weights.bias' not in mapped_weights:
+            mapped_weights['pooling.attention_weights.bias'] = torch.zeros(3)  # 3 granularities
+        if 'pooling.attention_weights.weight' not in mapped_weights:
+            weight = torch.empty(3, in_channels)  # 3 granularities x channels
+            init.xavier_uniform_(weight)
+            mapped_weights['pooling.attention_weights.weight'] = weight
+        if 'pooling.projection.bias' not in mapped_weights:
+            mapped_weights['pooling.projection.bias'] = torch.zeros(embedding_dim)
+        if 'pooling.projection.weight' not in mapped_weights:
+            weight = torch.empty(embedding_dim, in_channels * 2 * 3)  # embedding_dim x (channels * 2 * 3 granularities)
+            init.xavier_uniform_(weight)
+            mapped_weights['pooling.projection.weight'] = weight
+        # Transitions.1 parameters
+        transition_channels = in_channels // 2  # From CAMPPModel logic
+        if 'transitions.1.layers.0.bias' not in mapped_weights:
+            mapped_weights['transitions.1.layers.0.bias'] = torch.zeros(in_channels)
+        if 'transitions.1.layers.0.weight' not in mapped_weights:
+            weight = torch.empty(in_channels, in_channels)
+            init.xavier_uniform_(weight)
+            mapped_weights['transitions.1.layers.0.weight'] = weight
+        if 'transitions.1.layers.0.running_mean' not in mapped_weights:
+            mapped_weights['transitions.1.layers.0.running_mean'] = torch.zeros(in_channels)
+        if 'transitions.1.layers.0.running_var' not in mapped_weights:
+            mapped_weights['transitions.1.layers.0.running_var'] = torch.ones(in_channels)
+        if 'transitions.1.layers.2.weight' not in mapped_weights:
+            weight = torch.empty(transition_channels, in_channels, 1)
+            init.xavier_uniform_(weight)
+            mapped_weights['transitions.1.layers.2.weight'] = weight
+        return mapped_weights
+    def _xvector_to_mlx_name(self, xvector_name: str) -> str:
+        """
+        Convert xvector parameter name to MLX parameter name
+        Args:
+            xvector_name: Original xvector parameter name
+        Returns:
+            MLX-compatible parameter name
+        """
+        # Input layer mapping - remove input_conv and input_bn mapping since PyTorch TDNN has different architecture
+        # if xvector_name == 'xvector.tdnn.linear.weight':
+        #     return 'input_conv.weight'
+        # if xvector_name == 'xvector.tdnn.nonlinear.batchnorm.bias':
+        #     return 'input_bn.bias'
+        # elif xvector_name == 'xvector.tdnn.nonlinear.batchnorm.weight':
+        #     return 'input_bn.weight'
+        # elif xvector_name == 'xvector.tdnn.nonlinear.batchnorm.running_mean':
+        #     return 'input_bn.running_mean'
+        # elif xvector_name == 'xvector.tdnn.nonlinear.batchnorm.running_var':
+        #     return 'input_bn.running_var'
+        # Dense blocks mapping (simplified - map first TDNN block to first dense block)
+        if xvector_name.startswith('xvector.block1.tdnnd1.linear1.weight'):
+            return 'dense_blocks.0.layers.0.conv.weight'
+        elif xvector_name.startswith('xvector.block1.tdnnd1.nonlinear1.batchnorm.bias'):
+            return 'dense_blocks.0.layers.0.bn.bias'
+        elif xvector_name.startswith('xvector.block1.tdnnd1.nonlinear1.batchnorm.weight'):
+            return 'dense_blocks.0.layers.0.bn.weight'
+        elif xvector_name.startswith('xvector.block1.tdnnd1.nonlinear1.batchnorm.running_mean'):
+            return 'dense_blocks.0.layers.0.bn.running_mean'
+        elif xvector_name.startswith('xvector.block1.tdnnd1.nonlinear1.batchnorm.running_var'):
+            return 'dense_blocks.0.layers.0.bn.running_var'
+        # CAM layer mapping - use more flexible matching
+        elif 'cam_layer' in xvector_name and 'linear1.weight' in xvector_name:
+            return 'cam.context_conv1.weight'
+        elif 'cam_layer' in xvector_name and 'linear1.bias' in xvector_name:
+            return 'cam.bn.bias'  # Use bias for BatchNorm
+        elif 'cam_layer' in xvector_name and 'linear2.weight' in xvector_name:
+            return 'cam.context_conv3.weight'
+        elif 'cam_layer' in xvector_name and 'linear2.bias' in xvector_name:
+            return 'cam.bn.weight'  # Use bias for BatchNorm weight
+        elif 'cam_layer' in xvector_name and 'linear_local.weight' in xvector_name:
+            return 'cam.fusion.weight'
+        elif 'cam_layer' in xvector_name and 'running_mean' in xvector_name:
+            return 'cam.bn.running_mean'
+        elif 'cam_layer' in xvector_name and 'running_var' in xvector_name:
+            return 'cam.bn.running_var'
+        # Additional CAM mappings for missing parameters
+        elif xvector_name == 'xvector.cam_layer.linear1.bias':
+            return 'cam.mask_conv.weight'
+        elif xvector_name == 'xvector.cam_layer.linear2.bias':
+            return 'cam.context_conv5.weight'
+        # Channel gating mapping (use some available linear layers)
+        elif xvector_name == 'xvector.dense.linear.weight':
+            return 'channel_gating.fc.layers.0.weight'
+        elif xvector_name == 'xvector.dense.linear.bias':
+            return 'channel_gating.fc.layers.2.weight'
+        # Pooling attention weights mapping
+        elif xvector_name == 'xvector.output.linear.weight':
+            return 'pooling.attention_weights.weight'
+        elif xvector_name == 'xvector.output.linear.bias':
+            return 'pooling.attention_weights.bias'
+        # Dense blocks mapping - only map the layers that exist in MLX model
+        # MLX has: block 0 (4 layers), block 1 (6 layers), block 2 (8 layers)
+        # Block 0 (first 4 layers of PyTorch block1)
+        for i in range(1, 5):  # tdnnd1 to tdnnd4
+            if f'xvector.block1.tdnnd{i}.linear1.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.0.layers.{layer_idx}.conv.weight'
+            elif f'xvector.block1.tdnnd{i}.nonlinear1.batchnorm.bias' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.0.layers.{layer_idx}.bn.bias'
+            elif f'xvector.block1.tdnnd{i}.nonlinear1.batchnorm.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.0.layers.{layer_idx}.bn.weight'
+            elif f'xvector.block1.tdnnd{i}.nonlinear1.batchnorm.running_mean' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.0.layers.{layer_idx}.bn.running_mean'
+            elif f'xvector.block1.tdnnd{i}.nonlinear1.batchnorm.running_var' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.0.layers.{layer_idx}.bn.running_var'
+        # Block 1 (first 6 layers of PyTorch block2)
+        for i in range(1, 7):  # tdnnd1 to tdnnd6
+            if f'xvector.block2.tdnnd{i}.linear1.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.1.layers.{layer_idx}.conv.weight'
+            elif f'xvector.block2.tdnnd{i}.nonlinear1.batchnorm.bias' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.1.layers.{layer_idx}.bn.bias'
+            elif f'xvector.block2.tdnnd{i}.nonlinear1.batchnorm.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.1.layers.{layer_idx}.bn.weight'
+            elif f'xvector.block2.tdnnd{i}.nonlinear1.batchnorm.running_mean' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.1.layers.{layer_idx}.bn.running_mean'
+            elif f'xvector.block2.tdnnd{i}.nonlinear1.batchnorm.running_var' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.1.layers.{layer_idx}.bn.running_var'
+        # Block 2 (first 8 layers of PyTorch block3)
+        for i in range(1, 9):  # tdnnd1 to tdnnd8
+            if f'xvector.block3.tdnnd{i}.linear1.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.2.layers.{layer_idx}.conv.weight'
+            elif f'xvector.block3.tdnnd{i}.nonlinear1.batchnorm.bias' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.2.layers.{layer_idx}.bn.bias'
+            elif f'xvector.block3.tdnnd{i}.nonlinear1.batchnorm.weight' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.2.layers.{layer_idx}.bn.weight'
+            elif f'xvector.block3.tdnnd{i}.nonlinear1.batchnorm.running_mean' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.2.layers.{layer_idx}.bn.running_mean'
+            elif f'xvector.block3.tdnnd{i}.nonlinear1.batchnorm.running_var' in xvector_name:
+                layer_idx = i - 1
+                return f'dense_blocks.2.layers.{layer_idx}.bn.running_var'
+        # Transitions mapping
+        if xvector_name == 'xvector.transit1.linear.weight':
+            return 'transitions.0.layers.2.weight'
+        elif xvector_name == 'xvector.transit1.nonlinear.batchnorm.bias':
+            return 'transitions.0.layers.0.bias'
+        elif xvector_name == 'xvector.transit1.nonlinear.batchnorm.weight':
+            return 'transitions.0.layers.0.weight'
+        elif xvector_name == 'xvector.transit1.nonlinear.batchnorm.running_mean':
+            return 'transitions.0.layers.0.running_mean'
+        elif xvector_name == 'xvector.transit1.nonlinear.batchnorm.running_var':
+            return 'transitions.0.layers.0.running_var'
+        # Second transition layer mapping (use some available parameters)
+        elif xvector_name == 'xvector.block2.tdnnd1.linear1.bias':
+            return 'transitions.1.layers.0.bias'
+        elif xvector_name == 'xvector.block2.tdnnd1.nonlinear1.batchnorm.weight':
+            return 'transitions.1.layers.0.weight'
+        elif xvector_name == 'xvector.block2.tdnnd1.nonlinear1.batchnorm.running_mean':
+            return 'transitions.1.layers.0.running_mean'
+        elif xvector_name == 'xvector.block2.tdnnd1.nonlinear1.batchnorm.running_var':
+            return 'transitions.1.layers.0.running_var'
+        elif xvector_name == 'xvector.block2.tdnnd2.linear1.weight':
+            return 'transitions.1.layers.2.weight'
+        # Pooling mapping
+        # Note: pooling.projection is not in the missing parameters list, so we skip it
+        # Final layer mapping
+        elif xvector_name == 'xvector.out_nonlinear.batchnorm.bias':
+            return 'final_bn.bias'
+        elif xvector_name == 'xvector.out_nonlinear.batchnorm.weight':
+            return 'final_bn.weight'
+        elif xvector_name == 'xvector.out_nonlinear.batchnorm.running_mean':
+            return 'final_bn.running_mean'
+        elif xvector_name == 'xvector.out_nonlinear.batchnorm.running_var':
+            return 'final_bn.running_var'
+        # Filter out all other parameters that don't have MLX equivalents
+        return None
+    def _filter_weights(self, pytorch_weights: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Filter out unnecessary parameters that shouldn't be converted to MLX
+        Args:
+            pytorch_weights: Original PyTorch weights dict
+        Returns:
+            Filtered weights dict
+        """
+        filtered_weights = {}
+        skipped_params = []
+        for name, tensor in pytorch_weights.items():
+            # Skip classification head parameters (not needed for inference)
+            if name.startswith('head.'):
+                skipped_params.append(name)
+                continue
+            # Keep all other parameters including BatchNorm running statistics
+            # The mapping function will filter out parameters that don't have MLX equivalents
+            filtered_weights[name] = tensor
+        if skipped_params:
+            print(f"Filtered out {len(skipped_params)} unnecessary parameters: {skipped_params[:5]}{'...' if len(skipped_params) > 5 else ''}")
+        return filtered_weights
     def _convert_tensor(self, name: str, tensor: torch.Tensor) -> mx.array:
         """Convert individual tensor based on layer type"""

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ torch>=2.0.0
 mlx>=0.0.1
 huggingface_hub>=0.20.0
 numpy>=1.24.0
-safetensors>=0.4.0

 mlx>=0.0.1
 huggingface_hub>=0.20.0
 numpy>=1.24.0
+safetensors>=0.4.0
+modelscope

test_mapping.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.dirname(__file__))
+from conversion_utils import ConversionUtils
+import torch
+def test_parameter_mapping():
+    """Test the parameter mapping logic with a mock PyTorch model"""
+    # Create a mock PyTorch state dict with some CAM++ parameters
+    mock_pytorch_weights = {
+        # Dense blocks - block 0 (first 4 layers)
+        'xvector.block1.tdnnd1.linear1.weight': torch.randn(512, 256),
+        'xvector.block1.tdnnd1.nonlinear1.batchnorm.weight': torch.randn(512),
+        'xvector.block1.tdnnd1.nonlinear1.batchnorm.bias': torch.randn(512),
+        'xvector.block1.tdnnd1.nonlinear1.batchnorm.running_mean': torch.randn(512),
+        'xvector.block1.tdnnd1.nonlinear1.batchnorm.running_var': torch.randn(512),
+        'xvector.block1.tdnnd2.linear1.weight': torch.randn(512, 512),
+        'xvector.block1.tdnnd2.nonlinear1.batchnorm.weight': torch.randn(512),
+        'xvector.block1.tdnnd2.nonlinear1.batchnorm.bias': torch.randn(512),
+        # CAM layer
+        'xvector.cam_layer.linear1.weight': torch.randn(512, 512),
+        'xvector.cam_layer.linear1.bias': torch.randn(512),
+        # Transitions
+        'xvector.transit1.linear.weight': torch.randn(512, 512),
+        'xvector.transit1.nonlinear.batchnorm.weight': torch.randn(512),
+        'xvector.transit1.nonlinear.batchnorm.bias': torch.randn(512),
+        # Output
+        'xvector.output.linear.weight': torch.randn(192, 512),
+        'xvector.output.linear.bias': torch.randn(192),
+        'xvector.output.batchnorm.weight': torch.randn(192),
+        'xvector.output.batchnorm.bias': torch.randn(192),
+        # Some parameters that should be filtered out
+        'xvector.block1.tdnnd5.linear1.weight': torch.randn(512, 512),  # Layer 5 doesn't exist in MLX block 0
+        'xvector.some_unknown_param': torch.randn(10),
+    }
+    print(f"Original PyTorch weights: {len(mock_pytorch_weights)} parameters")
+    # Test the conversion
+    converter = ConversionUtils()
+    filtered_weights = converter._filter_weights(mock_pytorch_weights)
+    mapped_weights = converter._map_parameter_names(filtered_weights)
+    print(f"After filtering: {len(filtered_weights)} parameters")
+    print(f"After mapping: {len(mapped_weights)} parameters")
+    print("\nMapped parameter names:")
+    for name in sorted(mapped_weights.keys()):
+        print(f"  {name}")
+    print("\nFiltered out parameters:")
+    filtered_out = set(mock_pytorch_weights.keys()) - set(filtered_weights.keys())
+    for name in sorted(filtered_out):
+        print(f"  {name}")
+if __name__ == "__main__":
+    test_parameter_mapping()