Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -41,14 +41,14 @@ def load_model():
|
|
| 41 |
repo_id="Mykes/med_phi3-mini-4k-GGUF",
|
| 42 |
filename="*Q4_K_M.gguf",
|
| 43 |
verbose=False,
|
| 44 |
-
n_ctx=
|
| 45 |
-
n_batch=
|
| 46 |
-
n_threads=
|
|
|
|
| 47 |
use_mmap=True,
|
| 48 |
-
# n_ctx=
|
| 49 |
-
# n_batch=
|
| 50 |
-
# n_threads=
|
| 51 |
-
# use_mlock=True,
|
| 52 |
# use_mmap=True,
|
| 53 |
)
|
| 54 |
|
|
@@ -110,7 +110,7 @@ if prompt := st.chat_input("What is your question?"):
|
|
| 110 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 111 |
|
| 112 |
# Format the context with the last 5 messages
|
| 113 |
-
context = format_context(st.session_state.messages[-
|
| 114 |
|
| 115 |
# Prepare the model input
|
| 116 |
model_input = f"{context}Human: {prompt}\nAssistant:"
|
|
|
|
| 41 |
repo_id="Mykes/med_phi3-mini-4k-GGUF",
|
| 42 |
filename="*Q4_K_M.gguf",
|
| 43 |
verbose=False,
|
| 44 |
+
n_ctx=512,
|
| 45 |
+
n_batch=512,
|
| 46 |
+
n_threads=8,
|
| 47 |
+
use_mlock=True,
|
| 48 |
use_mmap=True,
|
| 49 |
+
# n_ctx=256, # Reduced context window
|
| 50 |
+
# n_batch=8, # Smaller batch size
|
| 51 |
+
# n_threads=2, # Adjust based on your CPU cores
|
|
|
|
| 52 |
# use_mmap=True,
|
| 53 |
)
|
| 54 |
|
|
|
|
| 110 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 111 |
|
| 112 |
# Format the context with the last 5 messages
|
| 113 |
+
context = format_context(st.session_state.messages[-3:])
|
| 114 |
|
| 115 |
# Prepare the model input
|
| 116 |
model_input = f"{context}Human: {prompt}\nAssistant:"
|