fastapi>=0.111 uvicorn[standard]>=0.30 gradio>=4.38 requests>=2.31 # vLLM + CUDA 12.1 vllm==0.6.4.post1 --extra-index-url https://download.pytorch.org/whl/cu121 torch==2.5.1 transformers>=4.44 accelerate>=0.30