Spaces:

ThejasRao
/

ModPilot

Sleeping

ModPilot / llm /client.py

Deploy ModPilot Investigation Engine

7302343 about 2 months ago

1.66 kB

	"""LLM client protocol — provider-agnostic interface.

	Spec: docs/Specs.md §8.2, docs/06-AILayer.md §3.1.
	Today's only implementation is `engine/llm/gemini.py`.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	from enum import StrEnum
	from typing import TYPE_CHECKING, Literal, Protocol

	if TYPE_CHECKING:
	from pydantic import BaseModel


	class Role(StrEnum):
	"""LLM call role — drives model selection and prompt scaffolding."""

	REASONER = "reasoner"
	SUMMARIZER = "summarizer"


	@dataclass(frozen=True)
	class Message:
	role: Literal["system", "user", "assistant"]
	content: str


	@dataclass
	class LLMResponse:
	"""Outcome of a single LLM call. `parsed` is populated when a response_schema was provided."""

	raw_text: str
	input_tokens: int
	output_tokens: int
	model: str
	latency_ms: int
	cost_usd: float
	parsed: BaseModel \| None = None


	class LLMClient(Protocol):
	"""The contract every LLM provider implementation honours."""

	async def complete( # noqa: PLR0913 — keyword-only contract from Specs §8.2
	self,
	*,
	role: Role,
	messages: list[Message],
	response_schema: type[BaseModel] \| None = None,
	max_tokens: int,
	temperature: float = 0.0,
	timeout_ms: int,
	correlation_id: str,
	# Gemini-2.5 internal "thinking" tokens count against max_tokens. Set 0
	# to disable (Flash summarization, simple structured extraction). Leave
	# None for the default behavior (Reasoner verdicts benefit from thinking).
	thinking_budget: int \| None = None,
	) -> LLMResponse: ...