#!/bin/bash # Setup script for Audio Detection (Nes2Net) environment # Run this from the project root: Multimodal Deepfake Detection/ echo "=== Setting up Audio Detection Environment ===" # Create conda environment # Using Python 3.9 for better compatibility (asvspoof5 branch approach) conda create -n deepfake-audio python=3.9 -y conda activate deepfake-audio # Install PyTorch with CUDA support (adjust CUDA version for your GPU) # RTX 5050 supports CUDA 12.x pip install torch==1.13.1+cu117 torchaudio==0.13.1+cu117 torchvision==0.14.1+cu117 \ -f https://download.pytorch.org/whl/torch_stable.html # Install core dependencies pip install librosa==0.9.1 soundfile==0.12.1 numpy==1.23.5 scipy==1.9.3 pip install transformers==4.30.2 s3prl==0.4.15 pip install tqdm scikit-learn pandas # Install fairseq (required for wav2vec2 frontend) # Clone specific commit used by the authors echo "=== Installing fairseq ===" cd audio_detection if [ ! -d "fairseq" ]; then git clone https://github.com/facebookresearch/fairseq.git cd fairseq git checkout a54021305d6b3c4c5959ac9395135f63202db8f1 pip install --editable ./ cd .. else echo "fairseq already exists, skipping clone" fi echo "" echo "=== Setup Complete ===" echo "Next steps:" echo "1. Download Nes2Net checkpoint from Google Drive (see README in Nes2Net_ASVspoof_ITW)" echo "2. Download wav2vec 2.0 XLSR model (will auto-download on first run via fairseq)" echo "3. Run inference: python Nes2Net_ASVspoof_ITW/easy_inference_demo.py --model_path --file_to_test --model_name wav2vec2_Nes2Net_X"