Spaces:
Build error
Build error
Daniel Marques commited on
Commit ·
61d38da
1
Parent(s): 415fe69
feat: add ministral model
Browse files- constants.py +5 -5
- prompt_template_utils.py +4 -4
constants.py
CHANGED
|
@@ -32,7 +32,7 @@ CHROMA_SETTINGS = Settings(
|
|
| 32 |
)
|
| 33 |
|
| 34 |
# Context Window and Max New Tokens
|
| 35 |
-
CONTEXT_WINDOW_SIZE =
|
| 36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
| 37 |
|
| 38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
|
@@ -98,14 +98,14 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Ac
|
|
| 98 |
#### (FOR GGUF MODELS)
|
| 99 |
####
|
| 100 |
|
| 101 |
-
MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
| 102 |
-
MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
| 103 |
|
| 104 |
# MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
|
| 105 |
# MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
| 111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
# Context Window and Max New Tokens
|
| 35 |
+
CONTEXT_WINDOW_SIZE = 3000
|
| 36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
| 37 |
|
| 38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
|
|
|
| 98 |
#### (FOR GGUF MODELS)
|
| 99 |
####
|
| 100 |
|
| 101 |
+
# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
| 102 |
+
# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
| 103 |
|
| 104 |
# MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
|
| 105 |
# MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
|
| 106 |
|
| 107 |
+
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
| 108 |
+
MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
|
| 109 |
|
| 110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
| 111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
prompt_template_utils.py
CHANGED
|
@@ -8,9 +8,9 @@ from langchain.memory import ConversationBufferMemory
|
|
| 8 |
from langchain.prompts import PromptTemplate
|
| 9 |
from langchain.memory.chat_message_histories import RedisChatMessageHistory
|
| 10 |
|
| 11 |
-
message_history = RedisChatMessageHistory(
|
| 12 |
-
|
| 13 |
-
)
|
| 14 |
|
| 15 |
# this is specific to Llama-2.
|
| 16 |
|
|
@@ -89,7 +89,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
| 89 |
)
|
| 90 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
| 91 |
|
| 92 |
-
memory = ConversationBufferMemory(input_key="question", memory_key="history"
|
| 93 |
|
| 94 |
return (
|
| 95 |
prompt,
|
|
|
|
| 8 |
from langchain.prompts import PromptTemplate
|
| 9 |
from langchain.memory.chat_message_histories import RedisChatMessageHistory
|
| 10 |
|
| 11 |
+
# message_history = RedisChatMessageHistory(
|
| 12 |
+
# url="redis://localhost:6379/1", ttl=600, session_id="my-session"
|
| 13 |
+
# )
|
| 14 |
|
| 15 |
# this is specific to Llama-2.
|
| 16 |
|
|
|
|
| 89 |
)
|
| 90 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
| 91 |
|
| 92 |
+
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
| 93 |
|
| 94 |
return (
|
| 95 |
prompt,
|