Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,7 @@ import os
|
|
| 25 |
from io import StringIO
|
| 26 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 27 |
|
| 28 |
-
memory = ChatMemoryBuffer.from_defaults(token_limit=
|
| 29 |
#
|
| 30 |
|
| 31 |
last = 0
|
|
@@ -82,7 +82,7 @@ client = InferenceClient(LLM_NAME)
|
|
| 82 |
Settings.llm = HuggingFaceInferenceAPI(model_name=LLM_NAME,
|
| 83 |
# device_map="auto",
|
| 84 |
# system_prompt = system_prompt,
|
| 85 |
-
context_window=
|
| 86 |
max_new_tokens=3072,
|
| 87 |
# stopping_ids=[50278, 50279, 50277, 1, 0],
|
| 88 |
generate_kwargs={"temperature": temperature, "top_p":top_p, "repetition_penalty": repetition_penalty,
|
|
|
|
| 25 |
from io import StringIO
|
| 26 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 27 |
|
| 28 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
|
| 29 |
#
|
| 30 |
|
| 31 |
last = 0
|
|
|
|
| 82 |
Settings.llm = HuggingFaceInferenceAPI(model_name=LLM_NAME,
|
| 83 |
# device_map="auto",
|
| 84 |
# system_prompt = system_prompt,
|
| 85 |
+
context_window=4092,
|
| 86 |
max_new_tokens=3072,
|
| 87 |
# stopping_ids=[50278, 50279, 50277, 1, 0],
|
| 88 |
generate_kwargs={"temperature": temperature, "top_p":top_p, "repetition_penalty": repetition_penalty,
|