Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ import os
|
|
| 23 |
from io import StringIO
|
| 24 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 25 |
|
| 26 |
-
memory = ChatMemoryBuffer.from_defaults(token_limit=
|
| 27 |
#
|
| 28 |
|
| 29 |
last = 0
|
|
@@ -80,7 +80,7 @@ client = InferenceClient(LLM_NAME)
|
|
| 80 |
Settings.llm = HuggingFaceInferenceAPI(model_name=LLM_NAME,
|
| 81 |
# device_map="auto",
|
| 82 |
# system_prompt = system_prompt,
|
| 83 |
-
context_window=
|
| 84 |
max_new_tokens=3072,
|
| 85 |
# stopping_ids=[50278, 50279, 50277, 1, 0],
|
| 86 |
generate_kwargs={"temperature": temperature, "top_p":top_p, "repetition_penalty": repetition_penalty,
|
|
|
|
| 23 |
from io import StringIO
|
| 24 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 25 |
|
| 26 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=3072)
|
| 27 |
#
|
| 28 |
|
| 29 |
last = 0
|
|
|
|
| 80 |
Settings.llm = HuggingFaceInferenceAPI(model_name=LLM_NAME,
|
| 81 |
# device_map="auto",
|
| 82 |
# system_prompt = system_prompt,
|
| 83 |
+
context_window=8192,
|
| 84 |
max_new_tokens=3072,
|
| 85 |
# stopping_ids=[50278, 50279, 50277, 1, 0],
|
| 86 |
generate_kwargs={"temperature": temperature, "top_p":top_p, "repetition_penalty": repetition_penalty,
|