Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Batch conversion script for multiple CAM++ models | |
| This script demonstrates how to programmatically convert multiple models | |
| without using the Gradio UI or CLI wrapper. | |
| Usage: | |
| export HF_TOKEN=your_token_here | |
| python batch_convert.py | |
| Or: | |
| python batch_convert.py --token YOUR_HF_TOKEN | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| from typing import List, Tuple | |
| import logging | |
| from app import CAMPPConverter, TARGET_ORGANIZATION | |
| # Set up logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Define models to convert | |
| PRESET_MODELS = [ | |
| { | |
| "input": "iic/speech_campplus_sv_zh-cn_16k-common", | |
| "output": "campplus_chinese_16k_common", | |
| "description": "Chinese (Basic)", | |
| "q2": False, | |
| "q4": True, | |
| "q8": False, | |
| }, | |
| { | |
| "input": "iic/speech_campplus_sv_zh_en_16k-common_advanced", | |
| "output": "campplus_multilingual_16k_advanced", | |
| "description": "Chinese-English (Advanced)", | |
| "q2": False, | |
| "q4": True, | |
| "q8": False, | |
| }, | |
| ] | |
| def parse_args(): | |
| """Parse command line arguments""" | |
| parser = argparse.ArgumentParser(description='Batch convert CAM++ models to MLX') | |
| parser.add_argument( | |
| '--token', | |
| help='HuggingFace API token (or set HF_TOKEN env var)' | |
| ) | |
| parser.add_argument( | |
| '--dry-run', | |
| action='store_true', | |
| help='Test without uploading' | |
| ) | |
| parser.add_argument( | |
| '--verbose', | |
| action='store_true', | |
| help='Enable verbose logging' | |
| ) | |
| return parser.parse_args() | |
| def get_hf_token(args) -> str: | |
| """Get HuggingFace token from args or environment""" | |
| if args.token: | |
| return args.token | |
| token = os.getenv('HF_TOKEN') or os.getenv('HUGGING_FACE_HUB_TOKEN') | |
| if not token: | |
| logger.error("ERROR: HuggingFace token required") | |
| logger.error(" Set HF_TOKEN environment variable or use --token argument") | |
| sys.exit(1) | |
| return token | |
| def convert_model( | |
| converter: CAMPPConverter, | |
| input_repo: str, | |
| output_name: str, | |
| token: str, | |
| q2: bool = False, | |
| q4: bool = True, | |
| q8: bool = False | |
| ) -> bool: | |
| """ | |
| Convert a single model | |
| Args: | |
| converter: CAMPPConverter instance | |
| input_repo: ModelScope repository ID | |
| output_name: Output model name | |
| token: HuggingFace API token | |
| q2: Enable 2-bit quantization | |
| q4: Enable 4-bit quantization | |
| q8: Enable 8-bit quantization | |
| Returns: | |
| True if successful, False otherwise | |
| """ | |
| try: | |
| result = converter.convert_model( | |
| input_repo=input_repo, | |
| output_name=output_name, | |
| hf_token=token, | |
| quantize_q2=q2, | |
| quantize_q4=q4, | |
| quantize_q8=q8 | |
| ) | |
| # Check if successful | |
| return "β " in result or "Conversion Successful" in result | |
| except Exception as e: | |
| logger.error(f"Conversion failed: {e}") | |
| return False | |
| def main(): | |
| """Main batch conversion function""" | |
| args = parse_args() | |
| if args.verbose: | |
| logging.getLogger().setLevel(logging.DEBUG) | |
| # Get token | |
| token = get_hf_token(args) | |
| if args.dry_run: | |
| logger.info("π DRY RUN MODE - Will not upload to HuggingFace") | |
| token = "dry_run_placeholder" | |
| logger.info("=" * 70) | |
| logger.info("CAM++ MLX Converter - Batch Conversion") | |
| logger.info("=" * 70) | |
| logger.info(f"Total models: {len(PRESET_MODELS)}") | |
| logger.info("=" * 70) | |
| logger.info("") | |
| # Create converter instance (reuse for all conversions) | |
| converter = CAMPPConverter() | |
| # Track results | |
| results = [] | |
| # Convert each model | |
| for i, model_config in enumerate(PRESET_MODELS, 1): | |
| logger.info("") | |
| logger.info("-" * 70) | |
| logger.info(f"[{i}/{len(PRESET_MODELS)}] Converting: {model_config['description']}") | |
| logger.info("-" * 70) | |
| logger.info(f" Input: {model_config['input']}") | |
| logger.info(f" Output: {TARGET_ORGANIZATION}/{model_config['output']}") | |
| logger.info(f" Quant: Q2={model_config['q2']}, Q4={model_config['q4']}, Q8={model_config['q8']}") | |
| logger.info("") | |
| success = convert_model( | |
| converter=converter, | |
| input_repo=model_config['input'], | |
| output_name=model_config['output'], | |
| token=token, | |
| q2=model_config['q2'], | |
| q4=model_config['q4'], | |
| q8=model_config['q8'] | |
| ) | |
| results.append({ | |
| "description": model_config['description'], | |
| "input": model_config['input'], | |
| "output": model_config['output'], | |
| "success": success | |
| }) | |
| if success: | |
| logger.info(f"β Success: {model_config['description']}") | |
| else: | |
| logger.error(f"β Failed: {model_config['description']}") | |
| # Print summary | |
| logger.info("") | |
| logger.info("=" * 70) | |
| logger.info("BATCH CONVERSION SUMMARY") | |
| logger.info("=" * 70) | |
| success_count = sum(1 for r in results if r['success']) | |
| failed_count = len(results) - success_count | |
| logger.info(f"Total models: {len(results)}") | |
| logger.info(f"Successful: {success_count}") | |
| logger.info(f"Failed: {failed_count}") | |
| logger.info("") | |
| # List results | |
| for result in results: | |
| status = "β " if result['success'] else "β" | |
| logger.info(f" {status} {result['description']}") | |
| logger.info(f" {result['input']} β {TARGET_ORGANIZATION}/{result['output']}") | |
| logger.info("=" * 70) | |
| # Exit with appropriate code | |
| if failed_count == 0: | |
| logger.info("β All conversions completed successfully!") | |
| sys.exit(0) | |
| else: | |
| logger.warning("β οΈ Some conversions failed. Check logs above for details.") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |