Spaces:
Runtime error
Runtime error
| import logging | |
| from llama_index.llms import LLM, MockLLM | |
| from app._config import settings | |
| from app.enums import LLMMode | |
| from app.paths import models_path | |
| logger = logging.getLogger(__name__) | |
| class LLMComponent: | |
| llm: LLM | |
| def __init__(self) -> None: | |
| llm_mode = settings.LLM_MODE | |
| logger.info(f"Initializing the LLM in mode={llm_mode}") | |
| match settings.LLM_MODE: | |
| case LLMMode.OPENAI: | |
| from llama_index.llms import OpenAI | |
| self.llm = OpenAI( | |
| api_key=settings.OPENAI_API_KEY, | |
| model=settings.OPENAI_MODEL, | |
| ) | |
| case LLMMode.MOCK: | |
| self.llm = MockLLM() | |
| case LLMMode.LOCAL: | |
| from llama_index.llms import LlamaCPP | |
| from llama_index.llms.llama_utils import ( | |
| completion_to_prompt, | |
| messages_to_prompt, | |
| ) | |
| self.llm = LlamaCPP( | |
| model_path=str(models_path / settings.LOCAL_HF_LLM_MODEL_FILE), | |
| temperature=settings.LLM_TEMPERATURE, | |
| max_new_tokens=settings.LLM_MAX_NEW_TOKENS, | |
| context_window=settings.LLM_CONTEXT_WINDOW, | |
| generate_kwargs={}, | |
| # set to at least 1 to use GPU | |
| # set to -1 for all gpu | |
| # set to 0 for cpu | |
| model_kwargs={"n_gpu_layers": 0}, | |
| # transform inputs into Llama2 format | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| verbose=True, | |
| ) | |