Jodaro commited on
Commit
398f222
·
verified ·
1 Parent(s): 863eb49

Switch to TinyLlama 1.1B Chat Q4_K_M

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -1,22 +1,22 @@
1
  import gradio as gr
2
  from ctransformers import AutoModelForCausalLM
3
 
4
- MODEL_REPO = "hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF"
5
- MODEL_FILE = "llama-3.2-3b-instruct-q4_k_m.gguf"
6
 
7
  llm = AutoModelForCausalLM.from_pretrained(
8
  MODEL_REPO,
9
  model_file=MODEL_FILE,
10
  model_type="llama",
11
  gpu_layers=0,
12
- context_length=8192,
13
  )
14
 
15
  def respond(message: str, history: list[tuple[str, str]]):
16
  prompt = ""
17
  for user_msg, bot_msg in history:
18
- prompt += f"[INST] {user_msg} [/INST] {bot_msg}\n"
19
- prompt += f"[INST] {message} [/INST]"
20
 
21
  out = llm(
22
  prompt,
@@ -30,7 +30,6 @@ def respond(message: str, history: list[tuple[str, str]]):
30
 
31
  return str(out)
32
 
33
-
34
  demo = gr.ChatInterface(respond)
35
 
36
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from ctransformers import AutoModelForCausalLM
3
 
4
+ MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
5
+ MODEL_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
6
 
7
  llm = AutoModelForCausalLM.from_pretrained(
8
  MODEL_REPO,
9
  model_file=MODEL_FILE,
10
  model_type="llama",
11
  gpu_layers=0,
12
+ context_length=4096,
13
  )
14
 
15
  def respond(message: str, history: list[tuple[str, str]]):
16
  prompt = ""
17
  for user_msg, bot_msg in history:
18
+ prompt += f"[INST]\n{user_msg}\n[/INST]\n{bot_msg}\n"
19
+ prompt += f"[INST]\n{message}\n[/INST]"
20
 
21
  out = llm(
22
  prompt,
 
30
 
31
  return str(out)
32
 
 
33
  demo = gr.ChatInterface(respond)
34
 
35
  if __name__ == "__main__":