| from abc import ABC, abstractmethod | |
| from dataclasses import dataclass | |
| from typing import Any, Optional | |
| class LLMInferenceOutput: | |
| content: str | |
| reasoning_content: str | None = None | |
| log_probs: list[float] | None = None | |
| out_token_ids: list[int] | None = None | |
| class LLMInferenceBackend(ABC): | |
| def __init__(self, **kwargs): | |
| ... | |
| def prepare_adapter( | |
| self, adapter_id: str, weights_got_updated: bool = False | |
| ) -> None: | |
| """Ensure adapter is ready/loaded for next generation call.""" | |
| async def generate(self, prompt: list[dict], regex: Optional[str] = None) -> str: | |
| ... | |
| def toggle_training_mode(self) -> None: | |
| ... | |
| def toggle_eval_mode(self) -> None: | |
| ... | |
| def shutdown(self) -> None: | |
| ... | |