hermes3-llama-cpp

Running

Jodaro commited on about 24 hours ago

Commit

807809c

verified ·

1 Parent(s): de96a1d

Fix gradio launch and mistral prompt formatting

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from ctransformers import AutoModelForCausalLM
 MODEL_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
 MODEL_FILE = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
-print("Loading model...")
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     model_file=MODEL_FILE,
@@ -13,23 +12,25 @@ llm = AutoModelForCausalLM.from_pretrained(
     context_length=2048,
 )
-def respond(message: str, history: list[list[str]]) -> str:
     prompt = ""
     for user_msg, bot_msg in history:
-        prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
-        prompt += f"<|im_start|>assistant\n{bot_msg}\n<|im_end|>\n"
-    prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
     out = llm(
         prompt,
         max_new_tokens=256,
         temperature=0.7,
         top_p=0.9,
-        stop=["<|im_end|>"],
     )
-    return out["text"]
 if __name__ == "__main__":
-    gr.ChatInterface(respond).launch()

 MODEL_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
 MODEL_FILE = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     model_file=MODEL_FILE,
     context_length=2048,
 )
+def respond(message: str, history: list[tuple[str, str]]):
     prompt = ""
     for user_msg, bot_msg in history:
+        prompt += f"[INST] {user_msg} [/INST] {bot_msg}</s>"
+    prompt += f"[INST] {message} [/INST]"
     out = llm(
         prompt,
         max_new_tokens=256,
         temperature=0.7,
         top_p=0.9,
+        stop=["</s>"],
     )
+    if isinstance(out, dict) and "text" in out:
+        return out["text"]
+    return str(out)
+demo = gr.ChatInterface(respond)
 if __name__ == "__main__":
+    demo.launch()