Spaces:

Hackoor
/

SampleLlamaModel-1

Runtime error

Hackoor commited on Sep 8, 2023

Commit

905c717

1 Parent(s): a51a087

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -59,15 +59,15 @@ def display_chat_history(chain):
 def create_conversational_chain(vector_store):
     load_dotenv()
     # Create llm
-    #llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",
-                        #streaming=True,
-                        #callbacks=[StreamingStdOutCallbackHandler()],
-                        #model_type="llama", config={'max_new_tokens': 500, 'temperature': 0.01})
-    llm = Replicate(
-        streaming = True,
-        model = "NousResearch/Llama-2-7b-hf",
-        callbacks=[StreamingStdOutCallbackHandler()],
-        input = {"temperature": 0.01, "max_length" :500,"top_p":1})
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',

 def create_conversational_chain(vector_store):
     load_dotenv()
     # Create llm
+    llm = CTransformers(model="NousResearch/Llama-2-7b-hf",
+                        streaming=True,
+                        callbacks=[StreamingStdOutCallbackHandler()],
+                        model_type="llama", config={'max_new_tokens': 500, 'temperature': 0.01})
+    # llm = Replicate(
+    #     streaming = True,
+    #     model = "NousResearch/Llama-2-7b-hf",
+    #     callbacks=[StreamingStdOutCallbackHandler()],
+    #     input = {"temperature": 0.01, "max_length" :500,"top_p":1})
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',