| # Configuration for Cog | |
| build: | |
| # Use a GPU because PULSE relies on a vision tower and GPU acceleration | |
| gpu: true | |
| python_version: "3.11" | |
| python_packages: | |
| # Core libraries | |
| - "torch==2.1.2" | |
| - "torchvision==0.16.2" | |
| - "transformers==4.37.2" | |
| - "tokenizers==0.15.1" | |
| - "sentencepiece==0.1.99" | |
| - "safetensors>=0.4.2" | |
| - "accelerate==0.30.0" | |
| # Vision and utility libraries | |
| - "timm==0.6.13" | |
| - "pillow>=9.5.0" | |
| - "requests>=2.31.0" | |
| - "numpy" | |
| - "einops==0.6.1" | |
| - "einops-exts==0.0.4" | |
| - "huggingface_hub==0.13.4" | |
| # LLaVA library pulled from GitHub at a fixed tag | |
| - "llava @ git+https://github.com/haotian-liu/LLaVA@v1.2.0" | |
| # The predictor defines how to run inference | |
| predict: "predict.py:Predictor" |