Spaces:

umarigan
/

AssistAI

Runtime error

umarigan commited on Aug 1, 2023

Commit

c54e666

1 Parent(s): 2cdfc91

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,16 @@ def load_model():
   logging.info("uploading model from hf pub")
   #model_path = '/content/llama.cpp/models/llama-2-7b-chat.ggmlv3.q4_K_M.bin'
   llm = LlamaCpp(model_path=model_path, n_ctx=4096)
-  llm_chain = LLMChain(llm=llm, prompt=prompt)
   #llm_chain = ConversationChain(llm=llm, prompt=promptmemory=ConversationBufferMemory())
   logging.info("uploading model done")
   return  llm_chain

   logging.info("uploading model from hf pub")
   #model_path = '/content/llama.cpp/models/llama-2-7b-chat.ggmlv3.q4_K_M.bin'
   llm = LlamaCpp(model_path=model_path, n_ctx=4096)
+  #llm_chain = LLMChain(llm=llm, prompt=prompt)
+  n_gpu_layers = 1  # Change this value based on your model and your GPU VRAM pool.
+  n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
+  llm = LlamaCpp(model_path=model_path, n_ctx=2048,
+               input={"temperature": 0.75, "max_length": 2000, "top_p": 1},
+               callback_manager=callback_manager,
+               n_gpu_layers=n_gpu_layers,
+                n_batch=n_batch,
+               verbose=True,)
   #llm_chain = ConversationChain(llm=llm, prompt=promptmemory=ConversationBufferMemory())
   logging.info("uploading model done")
   return  llm_chain