Spaces:

Aryanaideveloper
/

Multimodal_Deepfake_Detection

Sleeping

App Files Files Community

Multimodal_Deepfake_Detection / scripts /setup_audio.sh

Aryanaideveloper

feat: initial project setup with audio/video detection modules and fusion

5cc48fc 2 months ago

raw

history blame contribute delete

1.61 kB

	#!/bin/bash
	# Setup script for Audio Detection (Nes2Net) environment
	# Run this from the project root: Multimodal Deepfake Detection/

	echo "=== Setting up Audio Detection Environment ==="

	# Create conda environment
	# Using Python 3.9 for better compatibility (asvspoof5 branch approach)
	conda create -n deepfake-audio python=3.9 -y
	conda activate deepfake-audio

	# Install PyTorch with CUDA support (adjust CUDA version for your GPU)
	# RTX 5050 supports CUDA 12.x
	pip install torch==1.13.1+cu117 torchaudio==0.13.1+cu117 torchvision==0.14.1+cu117 \
	-f https://download.pytorch.org/whl/torch_stable.html

	# Install core dependencies
	pip install librosa==0.9.1 soundfile==0.12.1 numpy==1.23.5 scipy==1.9.3
	pip install transformers==4.30.2 s3prl==0.4.15
	pip install tqdm scikit-learn pandas

	# Install fairseq (required for wav2vec2 frontend)
	# Clone specific commit used by the authors
	echo "=== Installing fairseq ==="
	cd audio_detection
	if [ ! -d "fairseq" ]; then
	git clone https://github.com/facebookresearch/fairseq.git
	cd fairseq
	git checkout a54021305d6b3c4c5959ac9395135f63202db8f1
	pip install --editable ./
	cd ..
	else
	echo "fairseq already exists, skipping clone"
	fi

	echo ""
	echo "=== Setup Complete ==="
	echo "Next steps:"
	echo "1. Download Nes2Net checkpoint from Google Drive (see README in Nes2Net_ASVspoof_ITW)"
	echo "2. Download wav2vec 2.0 XLSR model (will auto-download on first run via fairseq)"
	echo "3. Run inference: python Nes2Net_ASVspoof_ITW/easy_inference_demo.py --model_path <checkpoint> --file_to_test <audio_file> --model_name wav2vec2_Nes2Net_X"