Goated121 commited on
Commit
12df875
·
verified ·
1 Parent(s): 2a19c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -1,20 +1,27 @@
1
  from llama_cpp import Llama
2
  import gradio as gr
3
 
 
4
  model = Llama(
5
  model_path="qwen2.5-1.5B-q4.gguf",
6
  n_ctx=4096,
7
  n_gpu_layers=0,
8
- chat_format="qwen2" # needed for Qwen2 models
9
  )
10
 
11
- def chat(prompt):
12
- out = model(
13
- prompt,
 
 
 
 
 
14
  max_tokens=256,
15
  temperature=0.7,
16
  )
17
- return out["choices"][0]["text"]
 
18
 
19
  gr.Interface(
20
  fn=chat,
 
1
  from llama_cpp import Llama
2
  import gradio as gr
3
 
4
+ # IMPORTANT: Use chat_format="qwen" (qwen2 is NOT supported)
5
  model = Llama(
6
  model_path="qwen2.5-1.5B-q4.gguf",
7
  n_ctx=4096,
8
  n_gpu_layers=0,
9
+ chat_format="qwen",
10
  )
11
 
12
+ def chat(user_input):
13
+ messages = [
14
+ {"role": "system", "content": "You are a helpful assistant. Answer ONLY the question. Do NOT continue, do NOT ask questions, do NOT add extra text."},
15
+ {"role": "user", "content": user_input}
16
+ ]
17
+
18
+ response = model.create_chat_completion(
19
+ messages=messages,
20
  max_tokens=256,
21
  temperature=0.7,
22
  )
23
+
24
+ return response["choices"][0]["message"]["content"]
25
 
26
  gr.Interface(
27
  fn=chat,