Jodaro commited on
Commit
de96a1d
·
verified ·
1 Parent(s): 0e9e41e

Switch to Mistral 7B GGUF

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import gradio as gr
2
  from ctransformers import AutoModelForCausalLM
3
 
4
- MODEL_REPO = "Qwen/Qwen3-4B-GGUF"
5
- MODEL_FILE = "Qwen3-4B-Q4_K_M.gguf"
6
 
7
  print("Loading model...")
8
  llm = AutoModelForCausalLM.from_pretrained(
9
  MODEL_REPO,
10
  model_file=MODEL_FILE,
11
- model_type="qwen",
12
  gpu_layers=0,
13
  context_length=2048,
14
  )
@@ -18,15 +18,18 @@ def respond(message: str, history: list[list[str]]) -> str:
18
  for user_msg, bot_msg in history:
19
  prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
20
  prompt += f"<|im_start|>assistant\n{bot_msg}\n<|im_end|>\n"
 
21
  prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
 
22
  out = llm(
23
  prompt,
24
- max_new_tokens=512,
25
  temperature=0.7,
26
  top_p=0.9,
27
  stop=["<|im_end|>"],
28
  )
29
- return out
 
30
 
31
  if __name__ == "__main__":
32
- gr.ChatInterface(respond).launch()
 
1
  import gradio as gr
2
  from ctransformers import AutoModelForCausalLM
3
 
4
+ MODEL_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
5
+ MODEL_FILE = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
6
 
7
  print("Loading model...")
8
  llm = AutoModelForCausalLM.from_pretrained(
9
  MODEL_REPO,
10
  model_file=MODEL_FILE,
11
+ model_type="mistral",
12
  gpu_layers=0,
13
  context_length=2048,
14
  )
 
18
  for user_msg, bot_msg in history:
19
  prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
20
  prompt += f"<|im_start|>assistant\n{bot_msg}\n<|im_end|>\n"
21
+
22
  prompt += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
23
+
24
  out = llm(
25
  prompt,
26
+ max_new_tokens=256,
27
  temperature=0.7,
28
  top_p=0.9,
29
  stop=["<|im_end|>"],
30
  )
31
+ return out["text"]
32
+
33
 
34
  if __name__ == "__main__":
35
+ gr.ChatInterface(respond).launch()