nagose commited on
Commit
14e8f86
·
verified ·
1 Parent(s): f898a57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -14
app.py CHANGED
@@ -15,24 +15,23 @@ logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
  # ====================== 模型配置 ======================
18
- # 使用 Hugging Face 的 repo ID GGUF 文件名
19
- # 这里以 4B 模型的 Q4_K_M 版本为例,你可以根据需要修改
20
  REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
21
  FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
22
- MODEL_ID = "qwen3.5-4b" # 自定义模型 ID
23
 
24
- # 加载模型 (CPU)
25
  logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
26
  llm = Llama.from_pretrained(
27
  repo_id=REPO_ID,
28
  filename=FILENAME,
29
- n_ctx=4096, # 上下文窗口,可根据需调整
30
  n_threads=None, # 自动使用所有 CPU 线程
31
  verbose=False,
32
  )
33
  logger.info("模型加载完成!")
34
 
35
- app = FastAPI(title="Qwen3.5 GGUF API (CoPaw兼容)")
36
 
37
  # ====================== CORS 中间件 ======================
38
  app.add_middleware(
@@ -50,7 +49,12 @@ async def health():
50
 
51
  @app.get("/v1/me")
52
  async def get_me():
53
- return {"id": "local-user", "name": "Local User", "email": "user@localhost", "is_admin": True}
 
 
 
 
 
54
 
55
  @app.get("/v1/dashboard/bots")
56
  async def get_bots():
@@ -58,7 +62,17 @@ async def get_bots():
58
 
59
  @app.get("/v1/models")
60
  async def list_models():
61
- return {"object": "list", "data": [{"id": MODEL_ID, "object": "model", "created": 1773000000, "owned_by": "user"}]}
 
 
 
 
 
 
 
 
 
 
62
 
63
  # ====================== 请求/响应数据模型 ======================
64
  class Message(BaseModel):
@@ -81,7 +95,10 @@ def convert_content_to_str(content: Optional[Union[str, List[Dict[str, Any]]]])
81
  if isinstance(content, str):
82
  return content
83
  if isinstance(content, list):
84
- texts = [part.get("text", "") for part in content if isinstance(part, dict) and part.get("type") == "text"]
 
 
 
85
  return "\n".join(texts)
86
  return str(content)
87
 
@@ -91,11 +108,14 @@ async def chat_completions(req: ChatRequest):
91
  # 转换消息格式
92
  messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
93
 
94
- # 处理 tools (简单示例,tools 添加到系统提示)
95
  if req.tools:
96
  tools_json = json.dumps(req.tools, ensure_ascii=False)
97
- tool_prompt = f"你是一个助手,可以使用以下工具:{tools_json}\n当需要调用工具时,请输出 <tool_call>{{...}}</tool_call>。"
98
- # 检查否有 system 消息有则合并,否则创建
 
 
 
99
  system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
100
  if system_index is not None:
101
  messages[system_index]["content"] += "\n\n" + tool_prompt
@@ -110,10 +130,10 @@ async def chat_completions(req: ChatRequest):
110
  max_tokens=req.max_tokens,
111
  stream=True,
112
  )
 
113
  async def generate():
114
  chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
115
  for chunk in stream:
116
- # 适配 OpenAI 流式格式
117
  if chunk.choices:
118
  delta = chunk.choices[0].delta
119
  finish_reason = chunk.choices[0].finish_reason
@@ -145,4 +165,4 @@ async def chat_completions(req: ChatRequest):
145
 
146
  @app.get("/")
147
  async def root():
148
- return {"status": "running", "model": REPO_ID + "/" + FILENAME}
 
15
  logger = logging.getLogger(__name__)
16
 
17
  # ====================== 模型配置 ======================
18
+ # 使用 Hugging Face GGUF 模型(4B Q4_K_M 版本)
 
19
  REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
20
  FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
21
+ MODEL_ID = "qwen3.5-4b" # CoPaw 中配置的模型名称
22
 
23
+ # 加载模型(自动从 HF 下载并缓存)
24
  logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
25
  llm = Llama.from_pretrained(
26
  repo_id=REPO_ID,
27
  filename=FILENAME,
28
+ n_ctx=4096, # 上下文窗口,可根据需调整
29
  n_threads=None, # 自动使用所有 CPU 线程
30
  verbose=False,
31
  )
32
  logger.info("模型加载完成!")
33
 
34
+ app = FastAPI(title="Qwen3.5-4B GGUF API (CoPaw兼容)")
35
 
36
  # ====================== CORS 中间件 ======================
37
  app.add_middleware(
 
49
 
50
  @app.get("/v1/me")
51
  async def get_me():
52
+ return {
53
+ "id": "local-user",
54
+ "name": "Local User",
55
+ "email": "user@localhost",
56
+ "is_admin": True
57
+ }
58
 
59
  @app.get("/v1/dashboard/bots")
60
  async def get_bots():
 
62
 
63
  @app.get("/v1/models")
64
  async def list_models():
65
+ return {
66
+ "object": "list",
67
+ "data": [
68
+ {
69
+ "id": MODEL_ID,
70
+ "object": "model",
71
+ "created": 1773000000,
72
+ "owned_by": "user"
73
+ }
74
+ ]
75
+ }
76
 
77
  # ====================== 请求/响应数据模型 ======================
78
  class Message(BaseModel):
 
95
  if isinstance(content, str):
96
  return content
97
  if isinstance(content, list):
98
+ texts = []
99
+ for part in content:
100
+ if isinstance(part, dict) and part.get("type") == "text":
101
+ texts.append(part.get("text", ""))
102
  return "\n".join(texts)
103
  return str(content)
104
 
 
108
  # 转换消息格式
109
  messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
110
 
111
+ # 处理 tools工具描述合并到 system
112
  if req.tools:
113
  tools_json = json.dumps(req.tools, ensure_ascii=False)
114
+ tool_prompt = (
115
+ f"你一个助手可以使用以下工具:\n{tools_json}\n"
116
+ f"当用户的问题需要调用工具时,请输出 <tool_call>{{...}}</tool_call> 格式的 JSON。"
117
+ )
118
+ # 查找现有 system 消息,有则合并,否则创建
119
  system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
120
  if system_index is not None:
121
  messages[system_index]["content"] += "\n\n" + tool_prompt
 
130
  max_tokens=req.max_tokens,
131
  stream=True,
132
  )
133
+
134
  async def generate():
135
  chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
136
  for chunk in stream:
 
137
  if chunk.choices:
138
  delta = chunk.choices[0].delta
139
  finish_reason = chunk.choices[0].finish_reason
 
165
 
166
  @app.get("/")
167
  async def root():
168
+ return {"status": "running", "model": f"{REPO_ID}/{FILENAME}"}