Spaces:
Runtime error
Runtime error
| from typing import ( | |
| Optional, | |
| List, | |
| Union, | |
| Dict, | |
| Iterator, | |
| Any, | |
| ) | |
| from llama_cpp import Llama | |
| from openai.types.chat import ( | |
| ChatCompletionMessage, | |
| ChatCompletion, | |
| ChatCompletionChunk, | |
| ) | |
| from openai.types.chat import ChatCompletionMessageParam | |
| from openai.types.chat.chat_completion import Choice | |
| from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice | |
| from openai.types.chat.chat_completion_chunk import ChoiceDelta | |
| from openai.types.completion_usage import CompletionUsage | |
| from api.adapter import get_prompt_adapter | |
| from api.utils.compat import model_parse | |
| class LlamaCppEngine: | |
| def __init__( | |
| self, | |
| model: Llama, | |
| model_name: str, | |
| prompt_name: Optional[str] = None, | |
| ): | |
| """ | |
| Initializes a LlamaCppEngine instance. | |
| Args: | |
| model (Llama): The Llama model to be used by the engine. | |
| model_name (str): The name of the model. | |
| prompt_name (Optional[str], optional): The name of the prompt. Defaults to None. | |
| """ | |
| self.model = model | |
| self.model_name = model_name.lower() | |
| self.prompt_name = prompt_name.lower() if prompt_name is not None else None | |
| self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name) | |
| def apply_chat_template( | |
| self, | |
| messages: List[ChatCompletionMessageParam], | |
| functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, | |
| tools: Optional[List[Dict[str, Any]]] = None, | |
| ) -> str: | |
| """ | |
| Applies a chat template to the given list of messages. | |
| Args: | |
| messages (List[ChatCompletionMessageParam]): The list of chat completion messages. | |
| functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None. | |
| tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None. | |
| Returns: | |
| str: The chat template applied to the messages. | |
| """ | |
| if self.prompt_adapter.function_call_available: | |
| messages = self.prompt_adapter.postprocess_messages(messages, functions, tools) | |
| return self.prompt_adapter.apply_chat_template(messages) | |
| def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]: | |
| """ | |
| Creates a completion using the specified prompt and additional keyword arguments. | |
| Args: | |
| prompt (str): The prompt for the completion. | |
| **kwargs: Additional keyword arguments to be passed to the model's create_completion method. | |
| Returns: | |
| Union[Iterator, Dict[str, Any]]: The completion generated by the model. | |
| """ | |
| return self.model.create_completion(prompt, **kwargs) | |
| def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion: | |
| """ | |
| Creates a chat completion using the specified prompt and additional keyword arguments. | |
| Args: | |
| prompt (str): The prompt for the chat completion. | |
| **kwargs: Additional keyword arguments to be passed to the create_completion method. | |
| Returns: | |
| ChatCompletion: The chat completion generated by the model. | |
| """ | |
| completion = self.create_completion(prompt, **kwargs) | |
| message = ChatCompletionMessage( | |
| role="assistant", | |
| content=completion["choices"][0]["text"].strip(), | |
| ) | |
| choice = Choice( | |
| index=0, | |
| message=message, | |
| finish_reason="stop", | |
| logprobs=None, | |
| ) | |
| usage = model_parse(CompletionUsage, completion["usage"]) | |
| return ChatCompletion( | |
| id="chat" + completion["id"], | |
| choices=[choice], | |
| created=completion["created"], | |
| model=completion["model"], | |
| object="chat.completion", | |
| usage=usage, | |
| ) | |
| def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator: | |
| """ | |
| Generates a stream of chat completion chunks based on the given prompt. | |
| Args: | |
| prompt (str): The prompt for generating chat completion chunks. | |
| **kwargs: Additional keyword arguments for creating completions. | |
| Yields: | |
| ChatCompletionChunk: A chunk of chat completion generated from the prompt. | |
| """ | |
| completion = self.create_completion(prompt, **kwargs) | |
| for i, output in enumerate(completion): | |
| _id, _created, _model = output["id"], output["created"], output["model"] | |
| if i == 0: | |
| choice = ChunkChoice( | |
| index=0, | |
| delta=ChoiceDelta(role="assistant", content=""), | |
| finish_reason=None, | |
| logprobs=None, | |
| ) | |
| yield ChatCompletionChunk( | |
| id=f"chat{_id}", | |
| choices=[choice], | |
| created=_created, | |
| model=_model, | |
| object="chat.completion.chunk", | |
| ) | |
| if output["choices"][0]["finish_reason"] is None: | |
| delta = ChoiceDelta(content=output["choices"][0]["text"]) | |
| else: | |
| delta = ChoiceDelta() | |
| choice = ChunkChoice( | |
| index=0, | |
| delta=delta, | |
| finish_reason=output["choices"][0]["finish_reason"], | |
| logprobs=None, | |
| ) | |
| yield ChatCompletionChunk( | |
| id=f"chat{_id}", | |
| choices=[choice], | |
| created=_created, | |
| model=_model, | |
| object="chat.completion.chunk", | |
| ) | |
| def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]: | |
| return ( | |
| self._create_chat_completion_stream(prompt, **kwargs) | |
| if kwargs.get("stream", False) | |
| else self._create_chat_completion(prompt, **kwargs) | |
| ) | |
| def stop(self): | |
| """ | |
| Gets the stop property of the prompt adapter. | |
| Returns: | |
| The stop property of the prompt adapter, or None if it does not exist. | |
| """ | |
| return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None | |