| # Setup script for Audio Detection (Nes2Net) environment | |
| # Run this from the project root: Multimodal Deepfake Detection/ | |
| echo "=== Setting up Audio Detection Environment ===" | |
| # Create conda environment | |
| # Using Python 3.9 for better compatibility (asvspoof5 branch approach) | |
| conda create -n deepfake-audio python=3.9 -y | |
| conda activate deepfake-audio | |
| # Install PyTorch with CUDA support (adjust CUDA version for your GPU) | |
| # RTX 5050 supports CUDA 12.x | |
| pip install torch==1.13.1+cu117 torchaudio==0.13.1+cu117 torchvision==0.14.1+cu117 \ | |
| -f https://download.pytorch.org/whl/torch_stable.html | |
| # Install core dependencies | |
| pip install librosa==0.9.1 soundfile==0.12.1 numpy==1.23.5 scipy==1.9.3 | |
| pip install transformers==4.30.2 s3prl==0.4.15 | |
| pip install tqdm scikit-learn pandas | |
| # Install fairseq (required for wav2vec2 frontend) | |
| # Clone specific commit used by the authors | |
| echo "=== Installing fairseq ===" | |
| cd audio_detection | |
| if [ ! -d "fairseq" ]; then | |
| git clone https://github.com/facebookresearch/fairseq.git | |
| cd fairseq | |
| git checkout a54021305d6b3c4c5959ac9395135f63202db8f1 | |
| pip install --editable ./ | |
| cd .. | |
| else | |
| echo "fairseq already exists, skipping clone" | |
| fi | |
| echo "" | |
| echo "=== Setup Complete ===" | |
| echo "Next steps:" | |
| echo "1. Download Nes2Net checkpoint from Google Drive (see README in Nes2Net_ASVspoof_ITW)" | |
| echo "2. Download wav2vec 2.0 XLSR model (will auto-download on first run via fairseq)" | |
| echo "3. Run inference: python Nes2Net_ASVspoof_ITW/easy_inference_demo.py --model_path <checkpoint> --file_to_test <audio_file> --model_name wav2vec2_Nes2Net_X" | |