|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Utilities for adapter testing.""" |
|
|
|
|
|
import multiprocessing |
|
|
|
|
|
import pytest |
|
|
from flask import Flask, jsonify, request |
|
|
|
|
|
from nemo.collections.llm.evaluation.adapters.utils import wait_for_server |
|
|
|
|
|
DEFAULT_FAKE_RESPONSE = { |
|
|
"object": "chat.completion", |
|
|
"choices": [ |
|
|
{ |
|
|
"message": { |
|
|
"role": "assistant", |
|
|
"content": "This is a fake LLM response</think>This survives reasoning", |
|
|
} |
|
|
} |
|
|
], |
|
|
} |
|
|
|
|
|
|
|
|
def create_and_run_fake_endpoint(): |
|
|
"""Create and run a fake OpenAI API endpoint.""" |
|
|
app = Flask(__name__) |
|
|
|
|
|
@app.route("/v1/chat/completions", methods=["POST"]) |
|
|
def chat_completion(): |
|
|
data = request.json |
|
|
if "fake_response" in data: |
|
|
response = data["fake_response"] |
|
|
else: |
|
|
response = DEFAULT_FAKE_RESPONSE |
|
|
return jsonify(response) |
|
|
|
|
|
app.run(host="localhost", port=3300) |
|
|
|
|
|
|
|
|
def create_fake_endpoint_process(): |
|
|
"""Create a process running a fake OpenAI endpoint. |
|
|
|
|
|
Returns: |
|
|
The multiprocessing.Process object running the endpoint. |
|
|
""" |
|
|
p = multiprocessing.Process(target=create_and_run_fake_endpoint) |
|
|
p.start() |
|
|
|
|
|
|
|
|
if not wait_for_server("localhost", 3300): |
|
|
p.terminate() |
|
|
pytest.fail("Fake OpenAI endpoint did not start within the timeout period") |
|
|
|
|
|
return p |
|
|
|