Spaces:

mnhatdaous
/

learnable-speech

Sleeping

App Files Files Community

learnable-speech / scripts /upload_to_hf.py

mnhatdaous

Add comprehensive training pipeline for Hugging Face deployment

248479c 3 months ago

raw

history blame

7.19 kB

	#!/usr/bin/env python3
	"""Upload trained Learnable-Speech models to Hugging Face Hub"""

	import os
	import argparse
	from huggingface_hub import HfApi, create_repo, upload_file, upload_folder
	import torch
	import json
	from pathlib import Path

	def create_model_card(model_name, training_info):
	"""Create a model card for the uploaded model"""
	return f"""---
	license: apache-2.0
	tags:
	- text-to-speech
	- speech-synthesis
	- learnable-speech
	- cosyvoice
	- pytorch
	pipeline_tag: text-to-speech
	library_name: pytorch
	---

	# Learnable-Speech {model_name.upper()}

	This is a trained {model_name} model from the Learnable-Speech project, an unofficial implementation based on improvements of CosyVoice with learnable encoder and DAC-VAE.

	## Model Description

	- Model Type: {model_name.upper()} ({"Language Model" if model_name == "llm" else "Flow Matching Decoder"})
	- Architecture: {"Qwen2-based transformer for BPE→FSQ token mapping" if model_name == "llm" else "Causal conditional flow matching for FSQ→DAC latent mapping"}
	- Sample Rate: 24kHz
	- Framework: PyTorch

	## Training Details

	{training_info}

	## Usage

	```python
	import torch
	from learnable_speech import LearnableSpeech

	# Load the model
	model = LearnableSpeech.from_pretrained("your-username/learnable-speech-{model_name}")

	# Generate speech
	text = "Hello, this is Learnable-Speech!"
	audio = model.synthesize(text)
	```

	## Citation

	If you use this model, please cite:

	```bibtex
	@article{{learnable-speech,
	title={{Learnable-Speech}},
	author={{Learnable team}},
	year={{2025}},
	url={{https://arxiv.org/pdf/2505.07916}}
	}}
	```

	## Links

	- [GitHub Repository](https://github.com/primepake/learnable-speech)
	- [Original Paper](https://arxiv.org/pdf/2505.07916)
	- [Hugging Face Space Demo](https://huggingface.co/spaces/mnhatdaous/learnable-speech)
	"""

	def upload_model_to_hf(checkpoint_path, model_name, repo_name, token=None, private=False):
	"""Upload trained model to Hugging Face Hub"""

	api = HfApi(token=token)

	# Create repository
	try:
	create_repo(
	repo_id=repo_name,
	token=token,
	private=private,
	exist_ok=True
	)
	print(f"✅ Repository {repo_name} created/found")
	except Exception as e:
	print(f"❌ Failed to create repository: {e}")
	return False

	# Load checkpoint to get training info
	try:
	checkpoint = torch.load(checkpoint_path, map_location='cpu')
	training_info = f"""
	- Training Steps: {checkpoint.get('step', 'Unknown')}
	- Training Epochs: {checkpoint.get('epoch', 'Unknown')}
	- Training Framework: PyTorch DDP with AMP
	- Optimizer: AdamW
	- Learning Rate: {checkpoint.get('lr', 'Unknown')}
	"""
	except Exception as e:
	print(f"⚠️ Could not load checkpoint info: {e}")
	training_info = "Training information not available"

	# Create model card
	model_card = create_model_card(model_name, training_info)

	# Save model card to temporary file
	with open(f"README_{model_name}.md", "w") as f:
	f.write(model_card)

	try:
	# Upload checkpoint
	upload_file(
	path_or_fileobj=checkpoint_path,
	path_in_repo="pytorch_model.bin",
	repo_id=repo_name,
	token=token
	)
	print(f"✅ Model checkpoint uploaded")

	# Upload model card
	upload_file(
	path_or_fileobj=f"README_{model_name}.md",
	path_in_repo="README.md",
	repo_id=repo_name,
	token=token
	)
	print(f"✅ Model card uploaded")

	# Create and upload config
	config = {
	"model_type": "learnable_speech",
	"architecture": model_name,
	"sample_rate": 24000,
	"framework": "pytorch"
	}

	with open(f"config_{model_name}.json", "w") as f:
	json.dump(config, f, indent=2)

	upload_file(
	path_or_fileobj=f"config_{model_name}.json",
	path_in_repo="config.json",
	repo_id=repo_name,
	token=token
	)
	print(f"✅ Config uploaded")

	# Cleanup
	os.remove(f"README_{model_name}.md")
	os.remove(f"config_{model_name}.json")

	print(f"🎉 Model successfully uploaded to: https://huggingface.co/{repo_name}")
	return True

	except Exception as e:
	print(f"❌ Failed to upload: {e}")
	return False

	def main():
	parser = argparse.ArgumentParser(description="Upload Learnable-Speech models to Hugging Face")
	parser.add_argument("--checkpoint_dir", required=True, help="Directory containing trained checkpoints")
	parser.add_argument("--username", required=True, help="Your Hugging Face username")
	parser.add_argument("--token", help="Hugging Face API token (or set HF_TOKEN env var)")
	parser.add_argument("--private", action="store_true", help="Make repositories private")
	parser.add_argument("--models", nargs="+", choices=["llm", "flow", "both"], default=["both"],
	help="Which models to upload")

	args = parser.parse_args()

	# Get token
	token = args.token or os.getenv("HF_TOKEN")
	if not token:
	print("❌ Please provide Hugging Face token via --token or HF_TOKEN env var")
	return

	checkpoint_dir = Path(args.checkpoint_dir)

	models_to_upload = []
	if "both" in args.models:
	models_to_upload = ["llm", "flow"]
	else:
	models_to_upload = args.models

	success_count = 0

	for model_name in models_to_upload:
	print(f"\n🚀 Uploading {model_name.upper()} model...")

	# Find latest checkpoint
	model_dir = checkpoint_dir / model_name
	if not model_dir.exists():
	print(f"❌ Model directory not found: {model_dir}")
	continue

	checkpoint_files = list(model_dir.glob("*.pt"))
	if not checkpoint_files:
	print(f"❌ No checkpoint files found in {model_dir}")
	continue

	# Get the latest checkpoint (by modification time)
	latest_checkpoint = max(checkpoint_files, key=os.path.getmtime)
	print(f"📁 Using checkpoint: {latest_checkpoint}")

	# Upload to HF
	repo_name = f"{args.username}/learnable-speech-{model_name}"
	success = upload_model_to_hf(
	checkpoint_path=str(latest_checkpoint),
	model_name=model_name,
	repo_name=repo_name,
	token=token,
	private=args.private
	)

	if success:
	success_count += 1

	print(f"\n🎉 Upload complete! {success_count}/{len(models_to_upload)} models uploaded successfully")

	if success_count > 0:
	print("\n📝 Next steps:")
	print("1. Update your Gradio app to use the uploaded models")
	print("2. Test the models in your Hugging Face Space")
	print("3. Share your trained models with the community!")

	if __name__ == "__main__":
	main()