Spaces:
Build error
Build error
| import os | |
| import torch | |
| # model path | |
| MODEL_NAME = "saiga_mistral_7b.Q4_K_M.gguf" | |
| MODEL_URL = f"https://huggingface.co/TheBloke/saiga_mistral_7b-GGUF/blob/main/{MODEL_NAME}" | |
| # FOR PRODUCTION | |
| CWD = os.path.dirname(os.path.realpath(__file__)) | |
| DATA_PATH = os.path.join(CWD, "data") | |
| DOCS_PATH = os.path.join(DATA_PATH, "docs") | |
| MODEL_PATH = os.path.join(CWD, "model") | |
| MODEL_SAVE_PATH = os.path.join(MODEL_PATH, MODEL_NAME) | |
| # RAG params | |
| N_GPU_LAYERS = ( | |
| -1 if torch.cuda.is_available() else 0 | |
| ) # The number of layers to put on the GPU. The rest will be on the CPU (0 means all layers on the CPU). | |
| N_BATCH = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU | |
| TEMPERATURE = 0.1 # The temperature of the sampling. 0.1 is a good value for most cases | |
| MAX_TOKENS = 1024 # The maximum number of tokens to generate | |
| TOP_P = 2 | |
| N_CTX = 2048 # context len, up to a maximum of 32k | |
| CHUNK_SIZE = 750 # max number of letters for each chunk during splitting | |
| CHUNK_OVERLAP = 200 # overlap between chunks | |
| SEARCH_TYPE = "mmr" | |
| LAST_MESSAGES = 3 # The number of last messages in conversation history to include in the context | |
| REPEAT_PENALTY = 1.1 # The penalty for repeating tokens in the output | |
| DEVICE = "cuda" if N_GPU_LAYERS > 0 else "cpu" | |
| EMBED_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
| VECTOR_STORE_PATH = os.path.join(DATA_PATH, "chroma_db") | |
| # retriever config | |
| SEARCH_KWARGS = {"k": 3, "score_threshold": 0.6} | |
| DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>" | |
| DEFAULT_RESPONSE_TEMPLATE = "<s>bot\n" | |
| DEFAULT_SYSTEM_PROMPT = "Ты ассистент помощник, который отвечает на вопросы используя предоставленный контекст. \ | |
| В качестве контекста используются тексты из различных источников. \ | |
| Постарайся ответить на вопрос максимально точно. \ | |
| Для ответа используй только информацию из контекста и вопроса. Ничего не выдумывай. \ | |
| Если не можешь ответить на вопрос, напиши - 'Не хватает данных для ответа.' " | |