Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Upload trained Learnable-Speech models to Hugging Face Hub""" | |
| import os | |
| import argparse | |
| from huggingface_hub import HfApi, create_repo, upload_file, upload_folder | |
| import torch | |
| import json | |
| from pathlib import Path | |
| def create_model_card(model_name, training_info): | |
| """Create a model card for the uploaded model""" | |
| return f"""--- | |
| license: apache-2.0 | |
| tags: | |
| - text-to-speech | |
| - speech-synthesis | |
| - learnable-speech | |
| - cosyvoice | |
| - pytorch | |
| pipeline_tag: text-to-speech | |
| library_name: pytorch | |
| --- | |
| # Learnable-Speech {model_name.upper()} | |
| This is a trained {model_name} model from the Learnable-Speech project, an unofficial implementation based on improvements of CosyVoice with learnable encoder and DAC-VAE. | |
| ## Model Description | |
| - **Model Type**: {model_name.upper()} ({"Language Model" if model_name == "llm" else "Flow Matching Decoder"}) | |
| - **Architecture**: {"Qwen2-based transformer for BPE→FSQ token mapping" if model_name == "llm" else "Causal conditional flow matching for FSQ→DAC latent mapping"} | |
| - **Sample Rate**: 24kHz | |
| - **Framework**: PyTorch | |
| ## Training Details | |
| {training_info} | |
| ## Usage | |
| ```python | |
| import torch | |
| from learnable_speech import LearnableSpeech | |
| # Load the model | |
| model = LearnableSpeech.from_pretrained("your-username/learnable-speech-{model_name}") | |
| # Generate speech | |
| text = "Hello, this is Learnable-Speech!" | |
| audio = model.synthesize(text) | |
| ``` | |
| ## Citation | |
| If you use this model, please cite: | |
| ```bibtex | |
| @article{{learnable-speech, | |
| title={{Learnable-Speech}}, | |
| author={{Learnable team}}, | |
| year={{2025}}, | |
| url={{https://arxiv.org/pdf/2505.07916}} | |
| }} | |
| ``` | |
| ## Links | |
| - [GitHub Repository](https://github.com/primepake/learnable-speech) | |
| - [Original Paper](https://arxiv.org/pdf/2505.07916) | |
| - [Hugging Face Space Demo](https://huggingface.co/spaces/mnhatdaous/learnable-speech) | |
| """ | |
| def upload_model_to_hf(checkpoint_path, model_name, repo_name, token=None, private=False): | |
| """Upload trained model to Hugging Face Hub""" | |
| api = HfApi(token=token) | |
| # Create repository | |
| try: | |
| create_repo( | |
| repo_id=repo_name, | |
| token=token, | |
| private=private, | |
| exist_ok=True | |
| ) | |
| print(f"✅ Repository {repo_name} created/found") | |
| except Exception as e: | |
| print(f"❌ Failed to create repository: {e}") | |
| return False | |
| # Load checkpoint to get training info | |
| try: | |
| checkpoint = torch.load(checkpoint_path, map_location='cpu') | |
| training_info = f""" | |
| - **Training Steps**: {checkpoint.get('step', 'Unknown')} | |
| - **Training Epochs**: {checkpoint.get('epoch', 'Unknown')} | |
| - **Training Framework**: PyTorch DDP with AMP | |
| - **Optimizer**: AdamW | |
| - **Learning Rate**: {checkpoint.get('lr', 'Unknown')} | |
| """ | |
| except Exception as e: | |
| print(f"⚠️ Could not load checkpoint info: {e}") | |
| training_info = "Training information not available" | |
| # Create model card | |
| model_card = create_model_card(model_name, training_info) | |
| # Save model card to temporary file | |
| with open(f"README_{model_name}.md", "w") as f: | |
| f.write(model_card) | |
| try: | |
| # Upload checkpoint | |
| upload_file( | |
| path_or_fileobj=checkpoint_path, | |
| path_in_repo="pytorch_model.bin", | |
| repo_id=repo_name, | |
| token=token | |
| ) | |
| print(f"✅ Model checkpoint uploaded") | |
| # Upload model card | |
| upload_file( | |
| path_or_fileobj=f"README_{model_name}.md", | |
| path_in_repo="README.md", | |
| repo_id=repo_name, | |
| token=token | |
| ) | |
| print(f"✅ Model card uploaded") | |
| # Create and upload config | |
| config = { | |
| "model_type": "learnable_speech", | |
| "architecture": model_name, | |
| "sample_rate": 24000, | |
| "framework": "pytorch" | |
| } | |
| with open(f"config_{model_name}.json", "w") as f: | |
| json.dump(config, f, indent=2) | |
| upload_file( | |
| path_or_fileobj=f"config_{model_name}.json", | |
| path_in_repo="config.json", | |
| repo_id=repo_name, | |
| token=token | |
| ) | |
| print(f"✅ Config uploaded") | |
| # Cleanup | |
| os.remove(f"README_{model_name}.md") | |
| os.remove(f"config_{model_name}.json") | |
| print(f"🎉 Model successfully uploaded to: https://huggingface.co/{repo_name}") | |
| return True | |
| except Exception as e: | |
| print(f"❌ Failed to upload: {e}") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Upload Learnable-Speech models to Hugging Face") | |
| parser.add_argument("--checkpoint_dir", required=True, help="Directory containing trained checkpoints") | |
| parser.add_argument("--username", required=True, help="Your Hugging Face username") | |
| parser.add_argument("--token", help="Hugging Face API token (or set HF_TOKEN env var)") | |
| parser.add_argument("--private", action="store_true", help="Make repositories private") | |
| parser.add_argument("--models", nargs="+", choices=["llm", "flow", "both"], default=["both"], | |
| help="Which models to upload") | |
| args = parser.parse_args() | |
| # Get token | |
| token = args.token or os.getenv("HF_TOKEN") | |
| if not token: | |
| print("❌ Please provide Hugging Face token via --token or HF_TOKEN env var") | |
| return | |
| checkpoint_dir = Path(args.checkpoint_dir) | |
| models_to_upload = [] | |
| if "both" in args.models: | |
| models_to_upload = ["llm", "flow"] | |
| else: | |
| models_to_upload = args.models | |
| success_count = 0 | |
| for model_name in models_to_upload: | |
| print(f"\n🚀 Uploading {model_name.upper()} model...") | |
| # Find latest checkpoint | |
| model_dir = checkpoint_dir / model_name | |
| if not model_dir.exists(): | |
| print(f"❌ Model directory not found: {model_dir}") | |
| continue | |
| checkpoint_files = list(model_dir.glob("*.pt")) | |
| if not checkpoint_files: | |
| print(f"❌ No checkpoint files found in {model_dir}") | |
| continue | |
| # Get the latest checkpoint (by modification time) | |
| latest_checkpoint = max(checkpoint_files, key=os.path.getmtime) | |
| print(f"📁 Using checkpoint: {latest_checkpoint}") | |
| # Upload to HF | |
| repo_name = f"{args.username}/learnable-speech-{model_name}" | |
| success = upload_model_to_hf( | |
| checkpoint_path=str(latest_checkpoint), | |
| model_name=model_name, | |
| repo_name=repo_name, | |
| token=token, | |
| private=args.private | |
| ) | |
| if success: | |
| success_count += 1 | |
| print(f"\n🎉 Upload complete! {success_count}/{len(models_to_upload)} models uploaded successfully") | |
| if success_count > 0: | |
| print("\n📝 Next steps:") | |
| print("1. Update your Gradio app to use the uploaded models") | |
| print("2. Test the models in your Hugging Face Space") | |
| print("3. Share your trained models with the community!") | |
| if __name__ == "__main__": | |
| main() | |