File size: 915 Bytes
1c8c60e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Optional
@dataclass
class LLMInferenceOutput:
content: str
reasoning_content: str | None = None
log_probs: list[float] | None = None
out_token_ids: list[int] | None = None
class LLMInferenceBackend(ABC):
@abstractmethod
def __init__(self, **kwargs):
...
@abstractmethod
def prepare_adapter(
self, adapter_id: str, weights_got_updated: bool = False
) -> None:
"""Ensure adapter is ready/loaded for next generation call."""
@abstractmethod
async def generate(self, prompt: list[dict], regex: Optional[str] = None) -> str:
...
@abstractmethod
def toggle_training_mode(self) -> None:
...
@abstractmethod
def toggle_eval_mode(self) -> None:
...
@abstractmethod
def shutdown(self) -> None:
...
|