Spaces:

theodotus
/

gemma-2b-uk

Sleeping

theodotus commited on Apr 7, 2024

Commit

aa49098

1 Parent(s): 1db0c3f

Use zephyr chat format

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="gemma-2b-uk.gguf",
-    chat_format="gemma",
     n_threads=2,
     n_threads_batch=2,
 )
@@ -15,20 +14,10 @@ llm = Llama(
 def convert_history(message, history):
-    chat_history = []
     for block in history[-1:]:
-        chat_history.append({
-            "role": "user",
-            "content": block[0]
-        })
-        chat_history.append({
-            "role": "model",
-            "content": block[1]
-        })
-    chat_history.append({
-        "role": "user",
-        "content": message
-    })
     return chat_history
@@ -45,11 +34,9 @@ def ask(message, history):
     response = ""
     for chunk in chunks:
-        delta = chunk["choices"][0]["delta"]
-        if "content" not in delta:
-            continue
-        print(delta["content"])
-        response += delta["content"]
         yield response

 llm = Llama(
     model_path="gemma-2b-uk.gguf",
     n_threads=2,
     n_threads_batch=2,
 )
 def convert_history(message, history):
+    chat_history = ""
     for block in history[-1:]:
+        chat_history += f"<|user|>\n{block[0]}<eos>\n<|assistant|>\n{block[1]}<eos>\n"
+    chat_history += f"<|user|>\n{message}<eos>\n<|assistant|>\n"
     return chat_history
     response = ""
     for chunk in chunks:
+        delta = chunk["choices"][0]["text"]
+        print(delta)
+        response += delta
         yield response