Spaces:
Running
Running
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55 | """LLM backend protocol and factory. | |
| The in-process ``LlamaCppBackend`` implements this protocol. Everything above this layer | |
| depends only on the protocol, so the engine never reaches into the runtime directly. | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import Iterator | |
| from dataclasses import dataclass | |
| from typing import Protocol, runtime_checkable | |
| from ..config import Settings | |
| class GenParams: | |
| """Sampling parameters for one call. | |
| Provide at most one constraint: ``grammar`` (GBNF text) or ``json_schema`` | |
| (a JSON-Schema dict the backend converts to a grammar). Leave both unset for the | |
| fast, grammar-free path. | |
| """ | |
| grammar: str | None = None | |
| json_schema: dict | None = None | |
| max_tokens: int = 512 | |
| temperature: float = 0.7 | |
| top_p: float = 0.95 | |
| stop: tuple[str, ...] = () | |
| seed: int | None = None | |
| # Anti-repetition. Defaults match llama.cpp's own defaults so generation behaviour is | |
| # unchanged; the interrogation hot path raises these to stop a small model from copying | |
| # its previous answer verbatim across turns. | |
| repeat_penalty: float = 1.1 | |
| frequency_penalty: float = 0.0 | |
| presence_penalty: float = 0.0 | |
| class LLMError(RuntimeError): | |
| """Raised on unrecoverable backend failures.""" | |
| class LLMBackend(Protocol): | |
| """A minimal text-in / text-out interface with optional grammar constraint.""" | |
| def generate(self, prompt: str, params: GenParams) -> str: | |
| """Return the full completion as a single string.""" | |
| ... | |
| def stream(self, prompt: str, params: GenParams) -> Iterator[str]: | |
| """Yield completion text deltas as they are produced.""" | |
| ... | |
| def make_backend(settings: Settings) -> LLMBackend: | |
| """Construct the in-process llama.cpp backend (import is local so the heavy native | |
| dependency is only loaded when a backend is actually built).""" | |
| from .llamacpp_backend import LlamaCppBackend | |
| return LlamaCppBackend.from_settings(settings) | |
| def join_stream(deltas: Iterator[str]) -> str: | |
| """Collect a streamed completion into one string.""" | |
| return "".join(deltas) | |