#!/usr/bin/env python3 """ Batch conversion script for multiple CAM++ models This script demonstrates how to programmatically convert multiple models without using the Gradio UI or CLI wrapper. Usage: export HF_TOKEN=your_token_here python batch_convert.py Or: python batch_convert.py --token YOUR_HF_TOKEN """ import os import sys import argparse from typing import List, Tuple import logging from app import CAMPPConverter, TARGET_ORGANIZATION # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Define models to convert PRESET_MODELS = [ { "input": "iic/speech_campplus_sv_zh-cn_16k-common", "output": "campplus_chinese_16k_common", "description": "Chinese (Basic)", "q2": False, "q4": True, "q8": False, }, { "input": "iic/speech_campplus_sv_zh_en_16k-common_advanced", "output": "campplus_multilingual_16k_advanced", "description": "Chinese-English (Advanced)", "q2": False, "q4": True, "q8": False, }, ] def parse_args(): """Parse command line arguments""" parser = argparse.ArgumentParser(description='Batch convert CAM++ models to MLX') parser.add_argument( '--token', help='HuggingFace API token (or set HF_TOKEN env var)' ) parser.add_argument( '--dry-run', action='store_true', help='Test without uploading' ) parser.add_argument( '--verbose', action='store_true', help='Enable verbose logging' ) return parser.parse_args() def get_hf_token(args) -> str: """Get HuggingFace token from args or environment""" if args.token: return args.token token = os.getenv('HF_TOKEN') or os.getenv('HUGGING_FACE_HUB_TOKEN') if not token: logger.error("ERROR: HuggingFace token required") logger.error(" Set HF_TOKEN environment variable or use --token argument") sys.exit(1) return token def convert_model( converter: CAMPPConverter, input_repo: str, output_name: str, token: str, q2: bool = False, q4: bool = True, q8: bool = False ) -> bool: """ Convert a single model Args: converter: CAMPPConverter instance input_repo: ModelScope repository ID output_name: Output model name token: HuggingFace API token q2: Enable 2-bit quantization q4: Enable 4-bit quantization q8: Enable 8-bit quantization Returns: True if successful, False otherwise """ try: result = converter.convert_model( input_repo=input_repo, output_name=output_name, hf_token=token, quantize_q2=q2, quantize_q4=q4, quantize_q8=q8 ) # Check if successful return "✅" in result or "Conversion Successful" in result except Exception as e: logger.error(f"Conversion failed: {e}") return False def main(): """Main batch conversion function""" args = parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Get token token = get_hf_token(args) if args.dry_run: logger.info("🔍 DRY RUN MODE - Will not upload to HuggingFace") token = "dry_run_placeholder" logger.info("=" * 70) logger.info("CAM++ MLX Converter - Batch Conversion") logger.info("=" * 70) logger.info(f"Total models: {len(PRESET_MODELS)}") logger.info("=" * 70) logger.info("") # Create converter instance (reuse for all conversions) converter = CAMPPConverter() # Track results results = [] # Convert each model for i, model_config in enumerate(PRESET_MODELS, 1): logger.info("") logger.info("-" * 70) logger.info(f"[{i}/{len(PRESET_MODELS)}] Converting: {model_config['description']}") logger.info("-" * 70) logger.info(f" Input: {model_config['input']}") logger.info(f" Output: {TARGET_ORGANIZATION}/{model_config['output']}") logger.info(f" Quant: Q2={model_config['q2']}, Q4={model_config['q4']}, Q8={model_config['q8']}") logger.info("") success = convert_model( converter=converter, input_repo=model_config['input'], output_name=model_config['output'], token=token, q2=model_config['q2'], q4=model_config['q4'], q8=model_config['q8'] ) results.append({ "description": model_config['description'], "input": model_config['input'], "output": model_config['output'], "success": success }) if success: logger.info(f"✅ Success: {model_config['description']}") else: logger.error(f"❌ Failed: {model_config['description']}") # Print summary logger.info("") logger.info("=" * 70) logger.info("BATCH CONVERSION SUMMARY") logger.info("=" * 70) success_count = sum(1 for r in results if r['success']) failed_count = len(results) - success_count logger.info(f"Total models: {len(results)}") logger.info(f"Successful: {success_count}") logger.info(f"Failed: {failed_count}") logger.info("") # List results for result in results: status = "✅" if result['success'] else "❌" logger.info(f" {status} {result['description']}") logger.info(f" {result['input']} → {TARGET_ORGANIZATION}/{result['output']}") logger.info("=" * 70) # Exit with appropriate code if failed_count == 0: logger.info("✅ All conversions completed successfully!") sys.exit(0) else: logger.warning("⚠️ Some conversions failed. Check logs above for details.") sys.exit(1) if __name__ == "__main__": main()