Jodaro commited on
Commit
689f1fc
·
verified ·
1 Parent(s): 1e86ec8

Switch to llama_cpp

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import gradio as gr
2
- from ctransformers import AutoModelForCausalLM
 
3
 
4
  MODEL_REPO = "bartowski/Hermes-3-Llama-3.1-8B-GGUF"
5
  MODEL_FILE = "Hermes-3-Llama-3.1-8B-Q4_K_M.gguf"
6
 
 
 
 
7
  print("Loading model...")
8
- model = AutoModelForCausalLM.from_pretrained(
9
- MODEL_REPO,
10
- model_file=MODEL_FILE,
11
- model_type="llama",
12
- gpu_layers=0,
13
- context_length=4096,
14
- )
15
-
16
- def respond(message, history):
17
  prompt = ""
18
  for user_msg, bot_msg in history:
19
  prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
@@ -21,10 +23,22 @@ def respond(message, history):
21
 
22
  prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
23
 
24
- output = model(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9)
25
- return output
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  iface = gr.ChatInterface(respond)
28
 
29
  if __name__ == "__main__":
30
- iface.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
 
5
  MODEL_REPO = "bartowski/Hermes-3-Llama-3.1-8B-GGUF"
6
  MODEL_FILE = "Hermes-3-Llama-3.1-8B-Q4_K_M.gguf"
7
 
8
+ print("Downloading model...")
9
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
10
+
11
  print("Loading model...")
12
+ llm = Llama(model_path=model_path, n_ctx=4096, n_threads=2)
13
+
14
+ STOP_TOKENS = ["<|im_end|>"]
15
+
16
+
17
+ def respond(message: str, history: list[list[str]]) -> str:
18
+ # conversation history into prompt
 
 
19
  prompt = ""
20
  for user_msg, bot_msg in history:
21
  prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
 
23
 
24
  prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
25
 
26
+ out = llm(
27
+ prompt,
28
+ max_tokens=512,
29
+ temperature=0.7,
30
+ top_p=0.9,
31
+ stop=STOP_TOKENS,
32
+ )
33
+
34
+ text = out["choices"][0]["text"]
35
+ # remove any stop token
36
+ for s in STOP_TOKENS:
37
+ text = text.split(s)[0]
38
+ return text
39
+
40
 
41
  iface = gr.ChatInterface(respond)
42
 
43
  if __name__ == "__main__":
44
+ iface.launch()