M-hv1 commited on
Commit
c9b16c8
·
verified ·
1 Parent(s): dbccc7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -24
app.py CHANGED
@@ -4,17 +4,17 @@ import copy
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
 
7
-
8
  llm = Llama(
9
  model_path=hf_hub_download(
10
- repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
11
- filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
12
  ),
13
  n_ctx=2048,
14
- n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
 
15
  )
16
 
17
-
18
  def generate_text(
19
  message,
20
  history: list[tuple[str, str]],
@@ -24,11 +24,13 @@ def generate_text(
24
  top_p,
25
  ):
26
  temp = ""
27
- input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
 
 
28
  for interaction in history:
29
- input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
30
 
31
- input_prompt = input_prompt + str(message) + " [/INST] "
32
 
33
  output = llm(
34
  input_prompt,
@@ -38,12 +40,8 @@ def generate_text(
38
  repeat_penalty=1.1,
39
  max_tokens=max_tokens,
40
  stop=[
41
- "<|prompter|>",
42
  "<|endoftext|>",
43
- "<|endoftext|> \n",
44
- "ASSISTANT:",
45
- "USER:",
46
- "SYSTEM:",
47
  ],
48
  stream=True,
49
  )
@@ -52,24 +50,21 @@ def generate_text(
52
  temp += stream["choices"][0]["text"]
53
  yield temp
54
 
55
-
56
  demo = gr.ChatInterface(
57
  generate_text,
58
- title="llama-cpp-python on GPU",
59
- description="Running LLM with https://github.com/abetlen/llama-cpp-python",
60
  examples=[
61
- ['How to setup a human base on Mars? Give short answer.'],
62
- ['Explain theory of relativity to me like I’m 8 years old.'],
63
- ['What is 9,000 * 9,000?'],
64
- ['Write a pun-filled happy birthday message to my friend Alex.'],
65
- ['Justify why a penguin might make a good king of the jungle.']
66
  ],
67
  cache_examples=False,
68
  retry_btn=None,
69
  undo_btn="Delete Previous",
70
  clear_btn="Clear",
71
  additional_inputs=[
72
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
73
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
74
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
75
  gr.Slider(
@@ -82,7 +77,5 @@ demo = gr.ChatInterface(
82
  ],
83
  )
84
 
85
-
86
  if __name__ == "__main__":
87
  demo.launch()
88
-
 
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
 
7
+ # إعداد الموديل (تم تثبيت Qwen مباشرة لتجنب الأخطاء)
8
  llm = Llama(
9
  model_path=hf_hub_download(
10
+ repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
11
+ filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
12
  ),
13
  n_ctx=2048,
14
+ n_gpu_layers=0, # تم جعله 0 ليعمل باستقرار على CPU
15
+ verbose=False
16
  )
17
 
 
18
  def generate_text(
19
  message,
20
  history: list[tuple[str, str]],
 
24
  top_p,
25
  ):
26
  temp = ""
27
+
28
+ # تعديل صيغة البرومبت لتناسب Qwen (ChatML Format)
29
+ input_prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n"
30
  for interaction in history:
31
+ input_prompt += f"<|im_start|>user\n{interaction[0]}<|im_end|>\n<|im_start|>assistant\n{interaction[1]}<|im_end|>\n"
32
 
33
+ input_prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
34
 
35
  output = llm(
36
  input_prompt,
 
40
  repeat_penalty=1.1,
41
  max_tokens=max_tokens,
42
  stop=[
43
+ "<|im_end|>",
44
  "<|endoftext|>",
 
 
 
 
45
  ],
46
  stream=True,
47
  )
 
50
  temp += stream["choices"][0]["text"]
51
  yield temp
52
 
 
53
  demo = gr.ChatInterface(
54
  generate_text,
55
+ title="Qwen 2.5 (1.5B) - Fast Server",
56
+ description="Running Qwen 2.5 on CPU via llama.cpp",
57
  examples=[
58
+ ['Hello, introduce yourself.'],
59
+ ['Explain quantum physics simply.'],
60
+ ['Write a python code to sum two numbers.']
 
 
61
  ],
62
  cache_examples=False,
63
  retry_btn=None,
64
  undo_btn="Delete Previous",
65
  clear_btn="Clear",
66
  additional_inputs=[
67
+ gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
68
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
69
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
70
  gr.Slider(
 
77
  ],
78
  )
79
 
 
80
  if __name__ == "__main__":
81
  demo.launch()