Spaces:

mnhatdaous
/

learnable-speech

Sleeping

App Files Files Community

learnable-speech / scripts /train_full_pipeline.sh

mnhatdaous

Add comprehensive training pipeline for Hugging Face deployment

248479c 3 months ago

raw

history blame

3.72 kB

	#!/bin/bash

	# Complete Learnable-Speech Training Pipeline
	# This script trains both LLM and Flow models sequentially

	set -e # Exit on any error

	echo "🎤 Starting Learnable-Speech Training Pipeline"
	echo "=============================================="

	# Configuration
	DATASET_ROOT="${DATASET_ROOT:-/data/dataset}"
	CHECKPOINT_DIR="${CHECKPOINT_DIR:-./checkpoints}"
	PRETRAINED_DIR="${PRETRAINED_DIR:-./pretrained_models/CosyVoice2-0.5B}"
	NUM_GPUS="${NUM_GPUS:-4}"
	BATCH_SIZE="${BATCH_SIZE:-32}"

	# Create checkpoint directories
	mkdir -p $CHECKPOINT_DIR/{llm,flow}

	# Check prerequisites
	echo "📋 Checking prerequisites..."
	if [ ! -d "$PRETRAINED_DIR" ]; then
	echo "❌ Pretrained models not found. Please run scripts/download_pretrained.sh first"
	exit 1
	fi

	if [ ! -f "./data/train.list" ]; then
	echo "❌ Training data not found. Please run scripts/prepare_data.sh first"
	exit 1
	fi

	# Set environment
	export CUDA_VISIBLE_DEVICES="0,1,2,3" # Adjust as needed
	export PYTHONPATH=$(pwd):$PYTHONPATH

	echo "🚀 Starting Stage 1: LLM Training (BPE → FSQ tokens)"
	echo "=================================================="

	torchrun --nnodes=1 --nproc_per_node=$NUM_GPUS --rdzv_id=1986 --rdzv_backend="c10d" --rdzv_endpoint="localhost:1234" \
	speech/train.py \
	--train_engine torch_ddp \
	--config speech/config.yaml \
	--train_data ./data/train.list \
	--cv_data ./data/val.list \
	--qwen_pretrain_path $PRETRAINED_DIR/CosyVoice-BlankEN \
	--model llm \
	--model_dir $CHECKPOINT_DIR/llm/ \
	--num_workers 24 \
	--prefetch 100 \
	--use_amp \
	--pretrained_model $PRETRAINED_DIR/llm.pt \
	--comet_project "learnable-speech" \
	--comet_experiment_name "llm-training-$(date +%Y%m%d-%H%M%S)"

	if [ $? -eq 0 ]; then
	echo "✅ Stage 1 (LLM) training completed successfully!"
	else
	echo "❌ Stage 1 (LLM) training failed!"
	exit 1
	fi

	echo "🚀 Starting Stage 2: Flow Training (FSQ → DAC latents)"
	echo "====================================================="

	# Find the latest LLM checkpoint
	LATEST_LLM_CHECKPOINT=$(ls -t $CHECKPOINT_DIR/llm/*.pt \| head -1)
	echo "Using LLM checkpoint: $LATEST_LLM_CHECKPOINT"

	torchrun --nnodes=1 --nproc_per_node=$NUM_GPUS --rdzv_id=1987 --rdzv_backend="c10d" --rdzv_endpoint="localhost:1235" \
	speech/train.py \
	--train_engine torch_ddp \
	--config speech/config.yaml \
	--train_data ./data/train.list \
	--cv_data ./data/val.list \
	--qwen_pretrain_path $PRETRAINED_DIR/CosyVoice-BlankEN \
	--model flow \
	--model_dir $CHECKPOINT_DIR/flow/ \
	--num_workers 24 \
	--prefetch 100 \
	--use_amp \
	--pretrained_model $PRETRAINED_DIR/flow.pt \
	--comet_project "learnable-speech" \
	--comet_experiment_name "flow-training-$(date +%Y%m%d-%H%M%S)"

	if [ $? -eq 0 ]; then
	echo "✅ Stage 2 (Flow) training completed successfully!"
	else
	echo "❌ Stage 2 (Flow) training failed!"
	exit 1
	fi

	echo "🎉 Training pipeline completed successfully!"
	echo "=========================================="
	echo "Trained models saved in: $CHECKPOINT_DIR"
	echo ""
	echo "Next steps:"
	echo "1. Test your models with inference scripts"
	echo "2. Upload checkpoints to Hugging Face Hub"
	echo "3. Update the Gradio app with trained models"

	# Create a summary file
	cat > $CHECKPOINT_DIR/training_summary.txt << EOF
	Learnable-Speech Training Summary
	Generated: $(date)

	Dataset: $DATASET_ROOT
	LLM Checkpoint: $(ls -t $CHECKPOINT_DIR/llm/*.pt \| head -1)
	Flow Checkpoint: $(ls -t $CHECKPOINT_DIR/flow/*.pt \| head -1)

	Configuration:
	- GPUs: $NUM_GPUS
	- Batch Size: $BATCH_SIZE
	- Mixed Precision: Enabled
	- Framework: PyTorch DDP

	Training completed successfully!
	EOF

	echo "📄 Training summary saved to: $CHECKPOINT_DIR/training_summary.txt"