|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import Any, Generator |
|
|
|
|
|
import pytest |
|
|
import requests |
|
|
|
|
|
from nemo.collections.llm.evaluation.adapters.server import create_server_process |
|
|
from nemo.collections.llm.evaluation.api import AdapterConfig |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def adapter_server(fake_openai_endpoint) -> Generator[AdapterConfig, Any, Any]: |
|
|
|
|
|
adapter_config = AdapterConfig( |
|
|
api_url="http://localhost:3300/v1/chat/completions", |
|
|
use_reasoning=True, |
|
|
end_reasoning_token="</think>", |
|
|
max_logged_responses=1, |
|
|
max_logged_requests=1, |
|
|
) |
|
|
|
|
|
|
|
|
p, adapter_config = create_server_process(adapter_config) |
|
|
|
|
|
yield adapter_config |
|
|
|
|
|
p.terminate() |
|
|
|
|
|
|
|
|
def test_adapter_server_post_request(adapter_server, capfd): |
|
|
|
|
|
url = f"http://localhost:{adapter_server.local_port}" |
|
|
data = { |
|
|
"prompt": "This is a test prompt", |
|
|
"max_tokens": 100, |
|
|
"temperature": 0.5, |
|
|
} |
|
|
|
|
|
response = requests.post(url, json=data) |
|
|
assert response.status_code == 200 |
|
|
assert "choices" in response.json() |
|
|
assert len(response.json()["choices"]) > 0 |
|
|
|
|
|
response = requests.post(url, json=data) |
|
|
assert response.status_code == 200 |
|
|
assert "choices" in response.json() |
|
|
assert len(response.json()["choices"]) > 0 |
|
|
|
|
|
assert "</think>" not in response.json()["choices"][0]["message"]["content"] |
|
|
|