# Setup conda environment source /home/azureuser/miniconda3/etc/profile.d/conda.sh conda create -n gsva-python310 python=3.10 -y conda activate gsva-python310 # Install basic dependencies first pip install Cython numpy==1.26.4 pip install packaging wheel setuptools==69.5.1 # Install the package in development mode pip install -e . # Install core dependencies pip install huggingface_hub==0.25.1 pip install uvicorn openai-whisper fastapi pip install hf_transfer pip install ninja # Setup CUDA environment sudo rm -rf /usr/local/cuda sudo ln -s /usr/local/cuda-12.6 /usr/local/cuda export PATH=/usr/local/cuda/bin:$PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH pip install gradio==5.3.0 gradio_client==1.4.2 # Install PyTorch with CUDA support pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 # Install flash-attn MAX_JOBS=4 pip install flash-attn --no-build-isolation # Install transformers (specific version required by llama-omni2) pip install transformers==4.43.4 # Install matcha-tts (required for CosyVoice) pip install matcha-tts --no-build-isolation # Install CosyVoice from GitHub pip install git+https://github.com/FunAudioLLM/CosyVoice.git # Install additional dependencies pip install conformer onnxruntime hyperpyyaml==1.2.2 ruamel.yaml # Download LLaMA-Omni2 model mkdir -p models huggingface-cli download ICTNLP/LLaMA-Omni2-3B --local-dir models/LLaMA-Omni2-3B # Alternative if huggingface-cli is not available: # python -c " # from huggingface_hub import snapshot_download # snapshot_download( # repo_id='ICTNLP/LLaMA-Omni2-3B', # local_dir='models/LLaMA-Omni2-3B', # local_dir_use_symlinks=False # )" # Download CosyVoice2 model mkdir -p models/cosyvoice2 python -c " from huggingface_hub import snapshot_download import os os.makedirs('models/cosyvoice2', exist_ok=True) snapshot_download( repo_id='FunAudioLLM/CosyVoice2-0.5B', local_dir='models/cosyvoice2', local_dir_use_symlinks=False ) " # Fix the CosyVoice configuration (remove mix_ratio if it causes issues) # Create a backup first cp models/cosyvoice2/cosyvoice2.yaml models/cosyvoice2/cosyvoice2.yaml.backup # Copy cosyvoice2.yaml to cosyvoice.yaml (the code looks for this file) cp models/cosyvoice2/cosyvoice2.yaml models/cosyvoice2/cosyvoice.yaml # Remove problematic mix_ratio parameter if needed grep -v "mix_ratio" models/cosyvoice2/cosyvoice.yaml > models/cosyvoice2/cosyvoice_fixed.yaml && mv models/cosyvoice2/cosyvoice_fixed.yaml models/cosyvoice2/cosyvoice.yaml # Kill any existing Python processes (optional, use with caution) # ps aux | grep python | grep -E "(controller|model_worker|gradio_web_server)" | awk '{print $2}' | xargs -r kill # Start the services echo "Starting controller..." nohup python -m llama_omni2.serve.controller --host 0.0.0.0 --port 10000 > controller.log 2>&1 & sleep 5 echo "Starting model worker..." nohup python -m llama_omni2.serve.model_worker \ --host 0.0.0.0 \ --controller http://localhost:10000 \ --port 40000 \ --worker http://localhost:40000 \ --model-path models/LLaMA-Omni2-3B \ --model-name LLaMA-Omni2-3B > worker.log 2>&1 & sleep 10 echo "Starting Gradio web server..." # Try with vocoder first python -m llama_omni2.serve.gradio_web_server \ --controller http://localhost:10000 \ --port 8000 \ --vocoder-dir models/cosyvoice2 # If the above fails, run without vocoder: # python -m llama_omni2.serve.gradio_web_server --controller http://localhost:10000 --port 8000 echo "All services started. Check logs:" echo " - Controller: tail -f controller.log" echo " - Model Worker: tail -f worker.log" echo " - Web UI: http://localhost:8000"