File size: 3,681 Bytes

778d4b8

# Setup conda environment
source /home/azureuser/miniconda3/etc/profile.d/conda.sh
conda create -n gsva-python310 python=3.10 -y
conda activate gsva-python310

# Install basic dependencies first
pip install Cython numpy==1.26.4
pip install packaging wheel setuptools==69.5.1

# Install the package in development mode
pip install -e .

# Install core dependencies
pip install huggingface_hub==0.25.1
pip install uvicorn openai-whisper fastapi
pip install hf_transfer
pip install ninja

# Setup CUDA environment
sudo rm -rf /usr/local/cuda
sudo ln -s /usr/local/cuda-12.6 /usr/local/cuda
export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

pip install gradio==5.3.0 gradio_client==1.4.2

# Install PyTorch with CUDA support
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Install flash-attn
MAX_JOBS=4 pip install flash-attn --no-build-isolation

# Install transformers (specific version required by llama-omni2)
pip install transformers==4.43.4

# Install matcha-tts (required for CosyVoice)
pip install matcha-tts --no-build-isolation

# Install CosyVoice from GitHub
pip install git+https://github.com/FunAudioLLM/CosyVoice.git

# Install additional dependencies
pip install conformer onnxruntime hyperpyyaml==1.2.2 ruamel.yaml

# Download LLaMA-Omni2 model
mkdir -p models
huggingface-cli download ICTNLP/LLaMA-Omni2-3B --local-dir models/LLaMA-Omni2-3B

# Alternative if huggingface-cli is not available:
# python -c "
# from huggingface_hub import snapshot_download
# snapshot_download(
#     repo_id='ICTNLP/LLaMA-Omni2-3B',
#     local_dir='models/LLaMA-Omni2-3B',
#     local_dir_use_symlinks=False
# )"

# Download CosyVoice2 model
mkdir -p models/cosyvoice2
python -c "
from huggingface_hub import snapshot_download
import os
os.makedirs('models/cosyvoice2', exist_ok=True)
snapshot_download(
    repo_id='FunAudioLLM/CosyVoice2-0.5B',
    local_dir='models/cosyvoice2',
    local_dir_use_symlinks=False
)
"

# Fix the CosyVoice configuration (remove mix_ratio if it causes issues)
# Create a backup first
cp models/cosyvoice2/cosyvoice2.yaml models/cosyvoice2/cosyvoice2.yaml.backup
# Copy cosyvoice2.yaml to cosyvoice.yaml (the code looks for this file)
cp models/cosyvoice2/cosyvoice2.yaml models/cosyvoice2/cosyvoice.yaml
# Remove problematic mix_ratio parameter if needed
grep -v "mix_ratio" models/cosyvoice2/cosyvoice.yaml > models/cosyvoice2/cosyvoice_fixed.yaml && mv models/cosyvoice2/cosyvoice_fixed.yaml models/cosyvoice2/cosyvoice.yaml

# Kill any existing Python processes (optional, use with caution)
# ps aux | grep python | grep -E "(controller|model_worker|gradio_web_server)" | awk '{print $2}' | xargs -r kill

# Start the services
echo "Starting controller..."
nohup python -m llama_omni2.serve.controller --host 0.0.0.0 --port 10000 > controller.log 2>&1 &
sleep 5

echo "Starting model worker..."
nohup python -m llama_omni2.serve.model_worker \
--host 0.0.0.0 \
--controller http://localhost:10000 \
--port 40000 \
--worker http://localhost:40000 \
--model-path models/LLaMA-Omni2-3B \
--model-name LLaMA-Omni2-3B > worker.log 2>&1 &
sleep 10

echo "Starting Gradio web server..."
# Try with vocoder first
python -m llama_omni2.serve.gradio_web_server \
--controller http://localhost:10000 \
--port 8000 \
--vocoder-dir models/cosyvoice2

# If the above fails, run without vocoder:
# python -m llama_omni2.serve.gradio_web_server --controller http://localhost:10000 --port 8000

echo "All services started. Check logs:"
echo "  - Controller: tail -f controller.log"
echo "  - Model Worker: tail -f worker.log"
echo "  - Web UI: http://localhost:8000"