skullcandy42 commited on
Commit
874d8b7
·
verified ·
1 Parent(s): cf8f716

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -27
app.py CHANGED
@@ -1,63 +1,110 @@
 
 
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
  CSS = """
6
  #qwen-md .katex-display { display: inline; }
 
 
7
  """
8
 
9
- # 下载GGUF模型文件(如果本地已下载,此代码会跳过)
10
  hf_hub_download(
11
  repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
12
  filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
13
  local_dir="./models",
14
  )
15
 
16
- # 仅CPU模式加载模型,去除flash attention和显存优化参数
17
  llm = Llama(
18
  model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
19
- n_ctx=2048, # 更短的上下文提高速度
20
- n_batch=256, # 小批量减少CPU负载
21
- n_threads=8, # 明确线程数 (可根据你的CPU核心数调整)
22
  chat_format="chatml",
23
- verbose=False
24
  )
25
 
26
- # Gradio 组件定义(简化配置)
27
- input_text = gr.Textbox(label="Ask math questions here")
28
- output_md = gr.Markdown(label="Answer", elem_id="qwen-md", show_copy_button=True)
29
- target_lang = gr.Dropdown(choices=["Chinese", "English"], value="Chinese", label="Output Language")
30
- new_tokens = gr.Slider(minimum=1, maximum=512, value=256, step=1, label="Max new tokens")
31
- temperature = gr.Slider(minimum=0, maximum=1.0, value=0.2, step=0.05, label="Temperature")
32
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top P")
33
- submit_btn = gr.Button("Ask")
34
- banner = gr.Markdown(value="### 📖 **Qwen2.5-Math 7B GGUF** - Optimized for CPU")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # 响应函数(精简版,更快速)
37
- def respond(input_text, lang, max_tokens, temperature, top_p):
38
- sys_msg = "你是一个乐于助人的数学助手,请使用中文回答。" if lang == "Chinese" else "You are a helpful math assistant. Please answer in English."
 
 
 
 
 
 
 
 
 
39
  messages = [
40
- {"role": "system", "content": sys_msg},
 
 
 
41
  {"role": "user", "content": input_text},
42
  ]
43
 
44
- stream_response = llm.create_chat_completion(
 
45
  messages=messages,
46
  stream=True,
47
  max_tokens=max_tokens,
48
  temperature=temperature,
49
  top_p=top_p,
50
  )
 
 
 
 
 
 
51
 
52
- result = ""
53
- for chunk in stream_response:
54
- content = chunk['choices'][0]["delta"].get("content", "")
55
- result += content
56
- yield result.strip()
57
 
58
  with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
59
  submit_btn.click(
60
- respond,
61
  inputs=[input_text, target_lang, new_tokens, temperature, top_p],
62
  outputs=output_md,
63
  )
@@ -75,4 +122,4 @@ with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
75
  output_md.render()
76
 
77
  if __name__ == "__main__":
78
- demo.launch()
 
1
+ import json
2
+ import spaces
3
+ import subprocess
4
  import gradio as gr
5
  from llama_cpp import Llama
6
  from huggingface_hub import hf_hub_download
7
 
8
  CSS = """
9
  #qwen-md .katex-display { display: inline; }
10
+ #qwen-md .katex-display>.katex { display: inline; }
11
+ #qwen-md .katex-display>.katex>.katex-html { display: inline; }
12
  """
13
 
 
14
  hf_hub_download(
15
  repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
16
  filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
17
  local_dir="./models",
18
  )
19
 
 
20
  llm = Llama(
21
  model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
22
+ flash_attn=True,
23
+ n_ctx=8192,
24
+ n_batch=1024,
25
  chat_format="chatml",
 
26
  )
27
 
28
+ # Gradio 组件
29
+ output_md = gr.Markdown(
30
+ label="Answer",
31
+ value="Answer will be presented here",
32
+ latex_delimiters=[
33
+ {"left": "\\(", "right": "\\)", "display": True},
34
+ {"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
35
+ {"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
36
+ {"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
37
+ {"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
38
+ {"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
39
+ {"left": "\\[", "right": "\\]", "display": True},
40
+ ],
41
+ elem_id="qwen-md",
42
+ show_copy_button=True,
43
+ container=True,
44
+ render=False,
45
+ )
46
+ target_lang = gr.Dropdown(
47
+ choices=["Chinese", "English"],
48
+ value="Chinese",
49
+ label="Output Language",
50
+ interactive=True,
51
+ render=False,
52
+ )
53
+ new_tokens = gr.Slider(
54
+ minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
55
+ )
56
+ temperature = gr.Slider(
57
+ minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
58
+ )
59
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
60
+ input_text = gr.Textbox(label="Ask math questions here", render=False)
61
+ submit_btn = gr.Button(value="Ask", render=False)
62
+ banner = gr.Markdown(value="""
63
+ # 📖 Qwen2.5-Math GGUF
64
+ This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
65
+ """
66
+ )
67
+
68
 
69
+ # Gradio 函数
70
+ def respond(
71
+ input_text,
72
+ lang="Chinese",
73
+ max_tokens=2048,
74
+ temperature=0.5,
75
+ top_p=0.95,
76
+ ):
77
+ if lang == "Chinese":
78
+ sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
79
+ else:
80
+ sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
81
  messages = [
82
+ {
83
+ "role": "system",
84
+ "content": sys_msg,
85
+ },
86
  {"role": "user", "content": input_text},
87
  ]
88
 
89
+ response = ""
90
+ response = llm.create_chat_completion(
91
  messages=messages,
92
  stream=True,
93
  max_tokens=max_tokens,
94
  temperature=temperature,
95
  top_p=top_p,
96
  )
97
+ message_repl = ""
98
+ for chunk in response:
99
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
100
+ message_repl = message_repl + \
101
+ chunk['choices'][0]["delta"]["content"]
102
+ yield message_repl
103
 
 
 
 
 
 
104
 
105
  with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
106
  submit_btn.click(
107
+ fn=respond,
108
  inputs=[input_text, target_lang, new_tokens, temperature, top_p],
109
  outputs=output_md,
110
  )
 
122
  output_md.render()
123
 
124
  if __name__ == "__main__":
125
+ demo.launch()