ModPilot / llm /client.py
ThejasRao's picture
Deploy ModPilot Investigation Engine
7302343
Raw
History Blame Contribute Delete
1.66 kB
"""LLM client protocol — provider-agnostic interface.
Spec: docs/Specs.md §8.2, docs/06-AILayer.md §3.1.
Today's only implementation is `engine/llm/gemini.py`.
"""
from __future__ import annotations
from dataclasses import dataclass
from enum import StrEnum
from typing import TYPE_CHECKING, Literal, Protocol
if TYPE_CHECKING:
from pydantic import BaseModel
class Role(StrEnum):
"""LLM call role — drives model selection and prompt scaffolding."""
REASONER = "reasoner"
SUMMARIZER = "summarizer"
@dataclass(frozen=True)
class Message:
role: Literal["system", "user", "assistant"]
content: str
@dataclass
class LLMResponse:
"""Outcome of a single LLM call. `parsed` is populated when a response_schema was provided."""
raw_text: str
input_tokens: int
output_tokens: int
model: str
latency_ms: int
cost_usd: float
parsed: BaseModel | None = None
class LLMClient(Protocol):
"""The contract every LLM provider implementation honours."""
async def complete( # noqa: PLR0913 — keyword-only contract from Specs §8.2
self,
*,
role: Role,
messages: list[Message],
response_schema: type[BaseModel] | None = None,
max_tokens: int,
temperature: float = 0.0,
timeout_ms: int,
correlation_id: str,
# Gemini-2.5 internal "thinking" tokens count against max_tokens. Set 0
# to disable (Flash summarization, simple structured extraction). Leave
# None for the default behavior (Reasoner verdicts benefit from thinking).
thinking_budget: int | None = None,
) -> LLMResponse: ...