Spaces:

kat33
/

llama.cpp

Runtime error

kat33 commited on Jul 26, 2023

Commit

e721849

1 Parent(s): cd5e755

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,13 +2,27 @@ import gradio as gr
 #import transformers
 #from transformers import pipeline
 from llama_cpp import Llama
-model="TheBloke/Nous-Hermes-13B-GGML"
-model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"
 def question_answer(context, question):
     text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n"
-    llm = Llama(model_path=model)
     output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True)
     print(output)
     return output.choices[0].text

 #import transformers
 #from transformers import pipeline
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+model_repo="TheBloke/Nous-Hermes-13B-GGML"
+model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"
+#model="TheBloke/Nous-Hermes-13B-GGML"
+#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"
+def download_model:
+    # See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
+    repo_id = hub_config.pop("repo_id")
+    filename = hub_config.pop("filename")
+    file = hf_hub_download(
+            repo_id=model_repo, filename=model_filename
+    )
+    return file
 def question_answer(context, question):
+    mfile=download_model()
     text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n"
+    llm = Llama(model_path=mfile)
     output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True)
     print(output)
     return output.choices[0].text