| faiss-gpu>=1.7.2 | |
| # for unstructured | |
| onnxruntime-gpu==1.15.0 | |
| auto-gptq>=0.7.1 | |
| #optimum>=1.17.1 | |
| # autoawq for cuda 12.1, else build from source: https://github.com/casper-hansen/AutoAWQ?tab=readme-ov-file#build-from-source | |
| autoawq | |
| autoawq-kernels | |
| exllama @ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl | |
| # See: Dao-AILab/flash-attention/issues/453 | |
| # flash-attn==2.4.2 | |