My-Fast-Server

Build error

App Files Files Community

M-hv1 commited on Dec 30, 2025

Commit

c9b16c8

verified ·

1 Parent(s): dbccc7b

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -24

app.py CHANGED Viewed

@@ -4,17 +4,17 @@ import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 llm = Llama(
     model_path=hf_hub_download(
-        repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
-        filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
     ),
     n_ctx=2048,
-    n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
 )
 def generate_text(
     message,
     history: list[tuple[str, str]],
@@ -24,11 +24,13 @@ def generate_text(
     top_p,
 ):
     temp = ""
-    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
     for interaction in history:
-        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
-    input_prompt = input_prompt + str(message) + " [/INST] "
     output = llm(
         input_prompt,
@@ -38,12 +40,8 @@ def generate_text(
         repeat_penalty=1.1,
         max_tokens=max_tokens,
         stop=[
-            "<|prompter|>",
             "<|endoftext|>",
-            "<|endoftext|> \n",
-            "ASSISTANT:",
-            "USER:",
-            "SYSTEM:",
         ],
         stream=True,
     )
@@ -52,24 +50,21 @@ def generate_text(
         temp += stream["choices"][0]["text"]
         yield temp
 demo = gr.ChatInterface(
     generate_text,
-    title="llama-cpp-python on GPU",
-    description="Running LLM with https://github.com/abetlen/llama-cpp-python",
     examples=[
-        ['How to setup a human base on Mars? Give short answer.'],
-        ['Explain theory of relativity to me like I’m 8 years old.'],
-        ['What is 9,000 * 9,000?'],
-        ['Write a pun-filled happy birthday message to my friend Alex.'],
-        ['Justify why a penguin might make a good king of the jungle.']
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
@@ -82,7 +77,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# إعداد الموديل (تم تثبيت Qwen مباشرة لتجنب الأخطاء)
 llm = Llama(
     model_path=hf_hub_download(
+        repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
+        filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
     ),
     n_ctx=2048,
+    n_gpu_layers=0, # تم جعله 0 ليعمل باستقرار على CPU
+    verbose=False
 )
 def generate_text(
     message,
     history: list[tuple[str, str]],
     top_p,
 ):
     temp = ""
+    # تعديل صيغة البرومبت لتناسب Qwen (ChatML Format)
+    input_prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n"
     for interaction in history:
+        input_prompt += f"<|im_start|>user\n{interaction[0]}<|im_end|>\n<|im_start|>assistant\n{interaction[1]}<|im_end|>\n"
+    input_prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     output = llm(
         input_prompt,
         repeat_penalty=1.1,
         max_tokens=max_tokens,
         stop=[
+            "<|im_end|>",
             "<|endoftext|>",
         ],
         stream=True,
     )
         temp += stream["choices"][0]["text"]
         yield temp
 demo = gr.ChatInterface(
     generate_text,
+    title="Qwen 2.5 (1.5B) - Fast Server",
+    description="Running Qwen 2.5 on CPU via llama.cpp",
     examples=[
+        ['Hello, introduce yourself.'],
+        ['Explain quantum physics simply.'],
+        ['Write a python code to sum two numbers.']
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
     additional_inputs=[
+        gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
     ],
 )
 if __name__ == "__main__":
     demo.launch()