Spaces:
Build error
Build error
File size: 2,344 Bytes
88fc169 c9b16c8 88fc169 c9b16c8 88fc169 c9b16c8 88fc169 3e93048 88fc169 c9b16c8 88fc169 c9b16c8 88fc169 c9b16c8 88fc169 3e93048 88fc169 3e93048 88fc169 c9b16c8 88fc169 c9b16c8 3e93048 c9b16c8 3e93048 88fc169 eb0271e c9b16c8 eb0271e 88fc169 eb0271e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | import os
import gradio as gr
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# إعداد الموديل (تم تثبيت Qwen مباشرة لتجنب الأخطاء)
llm = Llama(
model_path=hf_hub_download(
repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
),
n_ctx=2048,
n_gpu_layers=0, # تم جعله 0 ليعمل باستقرار على CPU
verbose=False
)
def generate_text(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
temp = ""
# تعديل صيغة البرومبت لتناسب Qwen (ChatML Format)
input_prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n"
for interaction in history:
input_prompt += f"<|im_start|>user\n{interaction[0]}<|im_end|>\n<|im_start|>assistant\n{interaction[1]}<|im_end|>\n"
input_prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
output = llm(
input_prompt,
temperature=temperature,
top_p=top_p,
top_k=40,
repeat_penalty=1.1,
max_tokens=max_tokens,
stop=[
"<|im_end|>",
"<|endoftext|>",
],
stream=True,
)
for out in output:
stream = copy.deepcopy(out)
temp += stream["choices"][0]["text"]
yield temp
demo = gr.ChatInterface(
generate_text,
title="Qwen 2.5 (1.5B) - Fast Server",
description="Running Qwen 2.5 on CPU via llama.cpp",
examples=[
['Hello, introduce yourself.'],
['Explain quantum physics simply.'],
['Write a python code to sum two numbers.']
],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
additional_inputs=[
gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
|