File size: 11,098 Bytes
1006a3a
 
 
 
 
 
 
 
 
7b72c0a
 
1006a3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b72c0a
868442e
49badf7
1006a3a
 
49badf7
1006a3a
 
 
 
 
b08494f
1006a3a
 
7b72c0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006a3a
 
 
 
 
 
 
 
 
b08494f
1006a3a
 
 
 
 
 
 
 
 
 
 
868442e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698373a
 
 
 
 
 
 
 
 
 
 
1006a3a
 
 
 
 
 
 
 
 
698373a
 
1006a3a
 
49badf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006a3a
 
 
 
 
868442e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d89a81
868442e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
"""Integration tests for OpenAI-compatible endpoint logic."""
from __future__ import annotations

import sys
import types
from pathlib import Path
import asyncio

import pytest
from fastapi import FastAPI, HTTPException
from fastapi.testclient import TestClient
import pydantic

fake_pydantic_settings = types.ModuleType("pydantic_settings")


class _FakeBaseSettings(pydantic.BaseModel):
    model_config: dict = {}

    def model_dump(self, *args, **kwargs):  # pragma: no cover - passthrough
        return super().model_dump(*args, **kwargs)


fake_pydantic_settings.BaseSettings = _FakeBaseSettings
fake_pydantic_settings.SettingsConfigDict = dict
sys.modules.setdefault("pydantic_settings", fake_pydantic_settings)

fake_torch = types.ModuleType("torch")
fake_torch.cuda = types.SimpleNamespace(is_available=lambda: False)
fake_torch.backends = types.SimpleNamespace(
    mps=types.SimpleNamespace(is_available=lambda: False)
)


class _NoGrad:
    def __enter__(self):
        return None

    def __exit__(self, exc_type, exc, tb):
        return False


def _no_grad() -> _NoGrad:
    return _NoGrad()


def _dummy_to(self, *args, **kwargs):  # noqa: D401
    return self


fake_torch.no_grad = _no_grad
fake_torch.Tensor = type("Tensor", (), {"to": _dummy_to, "dim": lambda self: 2, "unsqueeze": lambda self, _: self})
sys.modules.setdefault("torch", fake_torch)

fake_transformers = types.ModuleType("transformers")


class _DummyTokenizer:
    pad_token_id = 0
    eos_token = ""  # noqa: D401

    @classmethod
    def from_pretrained(cls, *args, **kwargs):  # noqa: D401
        return cls()

    def __call__(self, prompt: str, return_tensors: str = "pt") -> dict:
        tensor = types.SimpleNamespace(
            shape=(1, max(len(prompt), 1)),
            __getitem__=lambda self, key: self,
            to=_dummy_to,
        )
        return {"input_ids": tensor, "attention_mask": tensor}

    def encode(self, text: str, add_special_tokens: bool = False) -> list[int]:
        return [0] * max(len(text), 1)

    def decode(self, token_ids, skip_special_tokens: bool = True) -> str:
        return "".join(token_ids) if isinstance(token_ids, list) else ""


class _DummyModel:
    @classmethod
    def from_pretrained(cls, *args, **kwargs):  # noqa: D401
        return cls()

    def to(self, device):  # noqa: D401
        return self

    def generate(self, *args, **kwargs):  # noqa: D401
        return types.SimpleNamespace(dim=lambda: 2, unsqueeze=lambda _: None)


class _DummyGenerationConfig:
    def __init__(self, **kwargs):
        self.kwargs = kwargs


class _DummyStreamer:
    def __init__(self, *args, **kwargs):
        pass

    def __iter__(self):
        return iter([])


fake_transformers.AutoTokenizer = _DummyTokenizer
fake_transformers.AutoModelForCausalLM = _DummyModel
fake_transformers.GenerationConfig = _DummyGenerationConfig
fake_transformers.TextIteratorStreamer = _DummyStreamer
fake_transformers.PreTrainedTokenizerBase = object
sys.modules.setdefault("transformers", fake_transformers)

fake_yaml = types.ModuleType("yaml")
fake_yaml.safe_load = lambda data: []
sys.modules.setdefault("yaml", fake_yaml)

sys.path.append(str(Path(__file__).resolve().parents[1]))

from app.core import model_registry as model_registry_module
from app.core.model_registry import ModelMetadata, ModelSpec
from app.routers import chat, completions, embeddings, models, responses
from app.schemas.chat import ChatCompletionRequest
from app.schemas.completions import CompletionRequest
from app.schemas.responses import ResponseRequest


def test_list_models() -> None:
    payload = models.list_available_models()
    assert payload["object"] == "list"
    assert payload["data"] == []


def test_models_endpoint_returns_default_payload(monkeypatch: pytest.MonkeyPatch) -> None:
    class DummySettings:
        def __init__(self) -> None:
            self.model_allow_list = None
            self.include_default_models = True
            self.model_registry_path = None

        def model_dump(self) -> dict:
            return {
                "model_registry_path": self.model_registry_path,
                "include_default_models": self.include_default_models,
            }

    app = FastAPI()
    app.include_router(models.router)
    monkeypatch.setattr(
        model_registry_module,
        "get_settings",
        lambda: DummySettings(),
        raising=False,
    )

    with TestClient(app) as client:
        model_registry_module._registry.clear()
        try:
            resp = client.get("/v1/models")
            assert resp.status_code == 200
            body = resp.json()
            assert body["object"] == "list"
            data = body["data"]
            assert data, "Default models should be present"
            ids = {item["id"] for item in data}
            assert "GPT3-dev-350m-2805" in ids
        finally:
            model_registry_module._registry.clear()


def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
    class DummyResult:
        prompt_tokens = 5
        completions = [type("C", (), {"text": "Hello", "tokens": 2, "finish_reason": "stop"})()]

    def fake_generate(*args, **kwargs):
        return DummyResult()

    monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
    monkeypatch.setattr("app.routers.completions.get_model_spec", lambda model: None)
    payload = CompletionRequest.model_validate({
        "model": "GPT3-dev",
        "prompt": "Hello",
    })
    response = asyncio.run(completions.create_completion(payload))
    body = response.model_dump()
    assert body["model"] == "GPT3-dev"
    assert body["choices"][0]["text"] == "Hello"
    assert body["usage"]["total_tokens"] == 7


def test_completions_handles_prompt_list(monkeypatch: pytest.MonkeyPatch) -> None:
    calls: list[str] = []

    class DummyItem:
        def __init__(self, text: str) -> None:
            self.text = text
            self.tokens = 1
            self.finish_reason = "stop"

    class DummyResult:
        def __init__(self, prompt: str) -> None:
            self.prompt_tokens = len(prompt)
            self.completions = [DummyItem(f"{prompt}-out")]

    def fake_generate(model: str, prompt: str, **_: object) -> DummyResult:
        calls.append(prompt)
        return DummyResult(prompt)

    monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
    monkeypatch.setattr("app.routers.completions.get_model_spec", lambda model: None)
    payload = CompletionRequest.model_validate(
        {
            "model": "GPT3-dev",
            "prompt": ["Hello", "World"],
        }
    )
    response = asyncio.run(completions.create_completion(payload))
    body = response.model_dump()
    assert calls == ["Hello", "World"]
    assert [choice["text"] for choice in body["choices"]] == ["Hello-out", "World-out"]
    assert body["usage"]["prompt_tokens"] == len("Hello") + len("World")


def test_chat_rejects_non_instruct_model(monkeypatch: pytest.MonkeyPatch) -> None:
    """Chat completions should reject non-instruct models with a 400 error."""
    from app.core import model_registry

    # Register a non-instruct model
    monkeypatch.setattr(
        model_registry,
        "_registry",
        {"GPT3-dev": ModelSpec(name="GPT3-dev", hf_repo="k050506koch/GPT3-dev", is_instruct=False)},
    )

    payload = ChatCompletionRequest.model_validate({
        "model": "GPT3-dev",
        "messages": [
            {"role": "user", "content": "Hi"},
        ],
    })

    with pytest.raises(HTTPException) as exc:
        asyncio.run(chat.create_chat_completion(payload))
    assert exc.value.status_code == 400
    assert "not an instruct model" in exc.value.detail["message"]


def test_responses_string_input(monkeypatch: pytest.MonkeyPatch) -> None:
    class DummyResult:
        prompt_tokens = 4
        completions = [type("C", (), {"text": "Hello", "tokens": 2, "finish_reason": "stop"})()]

    def fake_generate(*args, **kwargs):
        return DummyResult()

    monkeypatch.setattr("app.routers.responses.engine.generate", fake_generate)
    monkeypatch.setattr(
        "app.routers.responses.get_model_spec",
        lambda model: ModelSpec(name=model, hf_repo="dummy/repo", is_instruct=False),
    )
    payload = ResponseRequest.model_validate({
        "model": "GPT3-dev",
        "input": "Hi",
    })
    response = asyncio.run(responses.create_response(payload))
    body = response.model_dump()
    assert body["object"] == "response"
    assert body["output"][0]["role"] == "assistant"
    assert body["output"][0]["content"][0]["text"] == "Hello"
    assert body["usage"]["input_tokens"] == 4
    assert body["usage"]["output_tokens"] == 2


def test_responses_instruct_messages(monkeypatch: pytest.MonkeyPatch) -> None:
    class DummyResult:
        prompt_tokens = 3
        completions = [type("C", (), {"text": "Sure", "tokens": 1, "finish_reason": "stop"})()]

    recorded_prompts: list[str] = []

    def fake_generate(*args, **kwargs):
        recorded_prompts.append(args[1])
        return DummyResult()

    monkeypatch.setattr("app.routers.responses.engine.generate", fake_generate)
    monkeypatch.setattr(
        "app.routers.responses.engine.apply_chat_template",
        lambda model, messages: "formatted prompt",
    )
    monkeypatch.setattr(
        "app.routers.responses.get_model_spec",
        lambda model: ModelSpec(name=model, hf_repo="dummy/instruct", is_instruct=True),
    )
    payload = ResponseRequest.model_validate({
        "model": "GPT4-dev-177M-1511-Instruct",
        "input": [{"role": "user", "content": "Hi"}],
    })
    response = asyncio.run(responses.create_response(payload))
    body = response.model_dump()
    assert recorded_prompts == ["formatted prompt"]
    assert body["output"][0]["content"][0]["text"] == "Sure"
    assert body["usage"]["total_tokens"] == 4


def test_openai_client_responses_create(monkeypatch: pytest.MonkeyPatch) -> None:
    openai_module = pytest.importorskip("openai")
    OpenAI = openai_module.OpenAI
    pytest.skip("OpenAI client test moved to live API coverage.")


def test_embeddings_not_implemented() -> None:
    with pytest.raises(HTTPException) as exc:
        asyncio.run(embeddings.create_embeddings())
    assert exc.value.status_code == 501
    assert exc.value.detail["code"] == "embeddings_backend_unavailable"


def test_model_detail_serialization(monkeypatch: pytest.MonkeyPatch) -> None:
    metadata = ModelMetadata(description="Example", parameter_count="1M")
    spec = ModelSpec(
        name="example",
        hf_repo="example/repo",
        metadata=metadata,
        dtype="float16",
        device="cpu",
        max_context_tokens=1024,
    )
    monkeypatch.setattr(models, "list_models", lambda: [spec])
    monkeypatch.setattr(models, "get_model_spec", lambda _: spec)

    listing = models.list_available_models()
    assert "metadata" not in listing["data"][0]
    detail = models.retrieve_model("example")
    assert detail["metadata"]["huggingface_repo"] == "example/repo"