import os from pathlib import Path import pytest import requests from dotenv import load_dotenv from pluto.dispatcher import _resolve_nvidia_api_key load_dotenv(Path(__file__).resolve().with_name(".env")) def test_nvidia_key_resolution_prefers_model_specific(monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "global") monkeypatch.setenv("NVIDIA_API_KEY_SUPER", "super") env_var, api_key = _resolve_nvidia_api_key("nvidia/llama-3.3-nemotron-super-49b-v1") assert env_var == "NVIDIA_API_KEY_SUPER" assert api_key == "super" @pytest.mark.live_api def test_nvidia_rerank_endpoint(): rerank_key = os.getenv("NVIDIA_API_KEY_RERANK") if not rerank_key: pytest.skip("NVIDIA_API_KEY_RERANK not configured") response = requests.post( "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-nemotron-rerank-1b-v2/reranking", headers={ "Authorization": f"Bearer {rerank_key}", "Content-Type": "application/json", }, json={ "model": "nvidia/llama-nemotron-rerank-1b-v2", "query": {"text": "Hi"}, "passages": [{"text": "Hello"}], }, timeout=20, ) assert response.status_code == 200, response.text