Update app.py
Browse files
app.py
CHANGED
|
@@ -15,24 +15,23 @@ logging.basicConfig(level=logging.INFO)
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
# ====================== 模型配置 ======================
|
| 18 |
-
# 使用 Hugging Face 的
|
| 19 |
-
# 这里以 4B 模型的 Q4_K_M 版本为例,你可以根据需要修改
|
| 20 |
REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
|
| 21 |
FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
|
| 22 |
-
MODEL_ID = "qwen3.5-4b" #
|
| 23 |
|
| 24 |
-
# 加载模型
|
| 25 |
logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
|
| 26 |
llm = Llama.from_pretrained(
|
| 27 |
repo_id=REPO_ID,
|
| 28 |
filename=FILENAME,
|
| 29 |
-
n_ctx=4096, # 上下文窗口,可根据需
|
| 30 |
n_threads=None, # 自动使用所有 CPU 线程
|
| 31 |
verbose=False,
|
| 32 |
)
|
| 33 |
logger.info("模型加载完成!")
|
| 34 |
|
| 35 |
-
app = FastAPI(title="Qwen3.5 GGUF API (CoPaw兼容)")
|
| 36 |
|
| 37 |
# ====================== CORS 中间件 ======================
|
| 38 |
app.add_middleware(
|
|
@@ -50,7 +49,12 @@ async def health():
|
|
| 50 |
|
| 51 |
@app.get("/v1/me")
|
| 52 |
async def get_me():
|
| 53 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
@app.get("/v1/dashboard/bots")
|
| 56 |
async def get_bots():
|
|
@@ -58,7 +62,17 @@ async def get_bots():
|
|
| 58 |
|
| 59 |
@app.get("/v1/models")
|
| 60 |
async def list_models():
|
| 61 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# ====================== 请求/响应数据模型 ======================
|
| 64 |
class Message(BaseModel):
|
|
@@ -81,7 +95,10 @@ def convert_content_to_str(content: Optional[Union[str, List[Dict[str, Any]]]])
|
|
| 81 |
if isinstance(content, str):
|
| 82 |
return content
|
| 83 |
if isinstance(content, list):
|
| 84 |
-
texts = [
|
|
|
|
|
|
|
|
|
|
| 85 |
return "\n".join(texts)
|
| 86 |
return str(content)
|
| 87 |
|
|
@@ -91,11 +108,14 @@ async def chat_completions(req: ChatRequest):
|
|
| 91 |
# 转换消息格式
|
| 92 |
messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
|
| 93 |
|
| 94 |
-
# 处理 tools
|
| 95 |
if req.tools:
|
| 96 |
tools_json = json.dumps(req.tools, ensure_ascii=False)
|
| 97 |
-
tool_prompt =
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
|
| 100 |
if system_index is not None:
|
| 101 |
messages[system_index]["content"] += "\n\n" + tool_prompt
|
|
@@ -110,10 +130,10 @@ async def chat_completions(req: ChatRequest):
|
|
| 110 |
max_tokens=req.max_tokens,
|
| 111 |
stream=True,
|
| 112 |
)
|
|
|
|
| 113 |
async def generate():
|
| 114 |
chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
|
| 115 |
for chunk in stream:
|
| 116 |
-
# 适配 OpenAI 流式格式
|
| 117 |
if chunk.choices:
|
| 118 |
delta = chunk.choices[0].delta
|
| 119 |
finish_reason = chunk.choices[0].finish_reason
|
|
@@ -145,4 +165,4 @@ async def chat_completions(req: ChatRequest):
|
|
| 145 |
|
| 146 |
@app.get("/")
|
| 147 |
async def root():
|
| 148 |
-
return {"status": "running", "model":
|
|
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
# ====================== 模型配置 ======================
|
| 18 |
+
# 使用 Hugging Face 上的 GGUF 模型(4B Q4_K_M 版本)
|
|
|
|
| 19 |
REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
|
| 20 |
FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
|
| 21 |
+
MODEL_ID = "qwen3.5-4b" # CoPaw 中配置的模型名称
|
| 22 |
|
| 23 |
+
# 加载模型(自动从 HF 下载并缓存)
|
| 24 |
logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
|
| 25 |
llm = Llama.from_pretrained(
|
| 26 |
repo_id=REPO_ID,
|
| 27 |
filename=FILENAME,
|
| 28 |
+
n_ctx=4096, # 上下文窗口,可根据需求调整
|
| 29 |
n_threads=None, # 自动使用所有 CPU 线程
|
| 30 |
verbose=False,
|
| 31 |
)
|
| 32 |
logger.info("模型加载完成!")
|
| 33 |
|
| 34 |
+
app = FastAPI(title="Qwen3.5-4B GGUF API (CoPaw兼容)")
|
| 35 |
|
| 36 |
# ====================== CORS 中间件 ======================
|
| 37 |
app.add_middleware(
|
|
|
|
| 49 |
|
| 50 |
@app.get("/v1/me")
|
| 51 |
async def get_me():
|
| 52 |
+
return {
|
| 53 |
+
"id": "local-user",
|
| 54 |
+
"name": "Local User",
|
| 55 |
+
"email": "user@localhost",
|
| 56 |
+
"is_admin": True
|
| 57 |
+
}
|
| 58 |
|
| 59 |
@app.get("/v1/dashboard/bots")
|
| 60 |
async def get_bots():
|
|
|
|
| 62 |
|
| 63 |
@app.get("/v1/models")
|
| 64 |
async def list_models():
|
| 65 |
+
return {
|
| 66 |
+
"object": "list",
|
| 67 |
+
"data": [
|
| 68 |
+
{
|
| 69 |
+
"id": MODEL_ID,
|
| 70 |
+
"object": "model",
|
| 71 |
+
"created": 1773000000,
|
| 72 |
+
"owned_by": "user"
|
| 73 |
+
}
|
| 74 |
+
]
|
| 75 |
+
}
|
| 76 |
|
| 77 |
# ====================== 请求/响应数据模型 ======================
|
| 78 |
class Message(BaseModel):
|
|
|
|
| 95 |
if isinstance(content, str):
|
| 96 |
return content
|
| 97 |
if isinstance(content, list):
|
| 98 |
+
texts = []
|
| 99 |
+
for part in content:
|
| 100 |
+
if isinstance(part, dict) and part.get("type") == "text":
|
| 101 |
+
texts.append(part.get("text", ""))
|
| 102 |
return "\n".join(texts)
|
| 103 |
return str(content)
|
| 104 |
|
|
|
|
| 108 |
# 转换消息格式
|
| 109 |
messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
|
| 110 |
|
| 111 |
+
# 处理 tools:将工具描述合并到 system 消息中
|
| 112 |
if req.tools:
|
| 113 |
tools_json = json.dumps(req.tools, ensure_ascii=False)
|
| 114 |
+
tool_prompt = (
|
| 115 |
+
f"你是一个助手,可以使用以下工具:\n{tools_json}\n"
|
| 116 |
+
f"当用户的问题需要调用工具时,请输出 <tool_call>{{...}}</tool_call> 格式的 JSON。"
|
| 117 |
+
)
|
| 118 |
+
# 查找现有 system 消息,有则合并,否则创建
|
| 119 |
system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
|
| 120 |
if system_index is not None:
|
| 121 |
messages[system_index]["content"] += "\n\n" + tool_prompt
|
|
|
|
| 130 |
max_tokens=req.max_tokens,
|
| 131 |
stream=True,
|
| 132 |
)
|
| 133 |
+
|
| 134 |
async def generate():
|
| 135 |
chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
|
| 136 |
for chunk in stream:
|
|
|
|
| 137 |
if chunk.choices:
|
| 138 |
delta = chunk.choices[0].delta
|
| 139 |
finish_reason = chunk.choices[0].finish_reason
|
|
|
|
| 165 |
|
| 166 |
@app.get("/")
|
| 167 |
async def root():
|
| 168 |
+
return {"status": "running", "model": f"{REPO_ID}/{FILENAME}"}
|