Aryanaideveloper's picture
feat: initial project setup with audio/video detection modules and fusion
5cc48fc
#!/bin/bash
# Setup script for Video Detection (GenConViT) environment
# Run this from the project root: Multimodal Deepfake Detection/
echo "=== Setting up Video Detection Environment ==="
# Create conda environment
conda create -n deepfake-video python=3.10 -y
conda activate deepfake-video
# Install PyTorch with CUDA support
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Install GenConViT dependencies
pip install opencv-python albumentations==1.3.0 timm==0.6.5
pip install decord==0.6.0
pip install pyyaml tqdm numpy
# Install dlib and face-recognition
# On Windows, dlib can be tricky. Try pip first, if fails use conda.
echo "=== Installing dlib (this may take a while) ==="
pip install cmake
pip install dlib
pip install face-recognition==1.3.0
echo ""
echo "=== Setup Complete ==="
echo "Next steps:"
echo "1. Download weights: bash scripts/download_weights.sh"
echo "2. Run inference: cd video_detection/GenConViT && python prediction.py --p <video_path> --e --v --f 15 --fp16"