campp-mlx-converter / batch_convert.py
BMP's picture
feat: Add batch conversion scripts for CAM++ models
656e7f6
#!/usr/bin/env python3
"""
Batch conversion script for multiple CAM++ models
This script demonstrates how to programmatically convert multiple models
without using the Gradio UI or CLI wrapper.
Usage:
export HF_TOKEN=your_token_here
python batch_convert.py
Or:
python batch_convert.py --token YOUR_HF_TOKEN
"""
import os
import sys
import argparse
from typing import List, Tuple
import logging
from app import CAMPPConverter, TARGET_ORGANIZATION
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Define models to convert
PRESET_MODELS = [
{
"input": "iic/speech_campplus_sv_zh-cn_16k-common",
"output": "campplus_chinese_16k_common",
"description": "Chinese (Basic)",
"q2": False,
"q4": True,
"q8": False,
},
{
"input": "iic/speech_campplus_sv_zh_en_16k-common_advanced",
"output": "campplus_multilingual_16k_advanced",
"description": "Chinese-English (Advanced)",
"q2": False,
"q4": True,
"q8": False,
},
]
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='Batch convert CAM++ models to MLX')
parser.add_argument(
'--token',
help='HuggingFace API token (or set HF_TOKEN env var)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Test without uploading'
)
parser.add_argument(
'--verbose',
action='store_true',
help='Enable verbose logging'
)
return parser.parse_args()
def get_hf_token(args) -> str:
"""Get HuggingFace token from args or environment"""
if args.token:
return args.token
token = os.getenv('HF_TOKEN') or os.getenv('HUGGING_FACE_HUB_TOKEN')
if not token:
logger.error("ERROR: HuggingFace token required")
logger.error(" Set HF_TOKEN environment variable or use --token argument")
sys.exit(1)
return token
def convert_model(
converter: CAMPPConverter,
input_repo: str,
output_name: str,
token: str,
q2: bool = False,
q4: bool = True,
q8: bool = False
) -> bool:
"""
Convert a single model
Args:
converter: CAMPPConverter instance
input_repo: ModelScope repository ID
output_name: Output model name
token: HuggingFace API token
q2: Enable 2-bit quantization
q4: Enable 4-bit quantization
q8: Enable 8-bit quantization
Returns:
True if successful, False otherwise
"""
try:
result = converter.convert_model(
input_repo=input_repo,
output_name=output_name,
hf_token=token,
quantize_q2=q2,
quantize_q4=q4,
quantize_q8=q8
)
# Check if successful
return "βœ…" in result or "Conversion Successful" in result
except Exception as e:
logger.error(f"Conversion failed: {e}")
return False
def main():
"""Main batch conversion function"""
args = parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Get token
token = get_hf_token(args)
if args.dry_run:
logger.info("πŸ” DRY RUN MODE - Will not upload to HuggingFace")
token = "dry_run_placeholder"
logger.info("=" * 70)
logger.info("CAM++ MLX Converter - Batch Conversion")
logger.info("=" * 70)
logger.info(f"Total models: {len(PRESET_MODELS)}")
logger.info("=" * 70)
logger.info("")
# Create converter instance (reuse for all conversions)
converter = CAMPPConverter()
# Track results
results = []
# Convert each model
for i, model_config in enumerate(PRESET_MODELS, 1):
logger.info("")
logger.info("-" * 70)
logger.info(f"[{i}/{len(PRESET_MODELS)}] Converting: {model_config['description']}")
logger.info("-" * 70)
logger.info(f" Input: {model_config['input']}")
logger.info(f" Output: {TARGET_ORGANIZATION}/{model_config['output']}")
logger.info(f" Quant: Q2={model_config['q2']}, Q4={model_config['q4']}, Q8={model_config['q8']}")
logger.info("")
success = convert_model(
converter=converter,
input_repo=model_config['input'],
output_name=model_config['output'],
token=token,
q2=model_config['q2'],
q4=model_config['q4'],
q8=model_config['q8']
)
results.append({
"description": model_config['description'],
"input": model_config['input'],
"output": model_config['output'],
"success": success
})
if success:
logger.info(f"βœ… Success: {model_config['description']}")
else:
logger.error(f"❌ Failed: {model_config['description']}")
# Print summary
logger.info("")
logger.info("=" * 70)
logger.info("BATCH CONVERSION SUMMARY")
logger.info("=" * 70)
success_count = sum(1 for r in results if r['success'])
failed_count = len(results) - success_count
logger.info(f"Total models: {len(results)}")
logger.info(f"Successful: {success_count}")
logger.info(f"Failed: {failed_count}")
logger.info("")
# List results
for result in results:
status = "βœ…" if result['success'] else "❌"
logger.info(f" {status} {result['description']}")
logger.info(f" {result['input']} β†’ {TARGET_ORGANIZATION}/{result['output']}")
logger.info("=" * 70)
# Exit with appropriate code
if failed_count == 0:
logger.info("βœ… All conversions completed successfully!")
sys.exit(0)
else:
logger.warning("⚠️ Some conversions failed. Check logs above for details.")
sys.exit(1)
if __name__ == "__main__":
main()