hermes3-llama-cpp

Running

Jodaro commited on about 23 hours ago

Commit

398f222

verified ·

1 Parent(s): 863eb49

Switch to TinyLlama 1.1B Chat Q4_K_M

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,22 @@
 import gradio as gr
 from ctransformers import AutoModelForCausalLM
-MODEL_REPO = "hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF"
-MODEL_FILE = "llama-3.2-3b-instruct-q4_k_m.gguf"
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     model_file=MODEL_FILE,
     model_type="llama",
     gpu_layers=0,
-    context_length=8192,
 )
 def respond(message: str, history: list[tuple[str, str]]):
     prompt = ""
     for user_msg, bot_msg in history:
-        prompt += f"[INST] {user_msg} [/INST] {bot_msg}\n"
-    prompt += f"[INST] {message} [/INST]"
     out = llm(
         prompt,
@@ -30,7 +30,6 @@ def respond(message: str, history: list[tuple[str, str]]):
     return str(out)
 demo = gr.ChatInterface(respond)
 if __name__ == "__main__":

 import gradio as gr
 from ctransformers import AutoModelForCausalLM
+MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+MODEL_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_REPO,
     model_file=MODEL_FILE,
     model_type="llama",
     gpu_layers=0,
+    context_length=4096,
 )
 def respond(message: str, history: list[tuple[str, str]]):
     prompt = ""
     for user_msg, bot_msg in history:
+        prompt += f"[INST]\n{user_msg}\n[/INST]\n{bot_msg}\n"
+    prompt += f"[INST]\n{message}\n[/INST]"
     out = llm(
         prompt,
     return str(out)
 demo = gr.ChatInterface(respond)
 if __name__ == "__main__":