Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,15 +59,15 @@ def display_chat_history(chain):
|
|
| 59 |
def create_conversational_chain(vector_store):
|
| 60 |
load_dotenv()
|
| 61 |
# Create llm
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
llm = Replicate(
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 72 |
|
| 73 |
chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
|
|
|
|
| 59 |
def create_conversational_chain(vector_store):
|
| 60 |
load_dotenv()
|
| 61 |
# Create llm
|
| 62 |
+
llm = CTransformers(model="NousResearch/Llama-2-7b-hf",
|
| 63 |
+
streaming=True,
|
| 64 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
| 65 |
+
model_type="llama", config={'max_new_tokens': 500, 'temperature': 0.01})
|
| 66 |
+
# llm = Replicate(
|
| 67 |
+
# streaming = True,
|
| 68 |
+
# model = "NousResearch/Llama-2-7b-hf",
|
| 69 |
+
# callbacks=[StreamingStdOutCallbackHandler()],
|
| 70 |
+
# input = {"temperature": 0.01, "max_length" :500,"top_p":1})
|
| 71 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 72 |
|
| 73 |
chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
|