Spaces:

nagose
/

lh4b

Build error

App Files Files Community

nagose commited on Mar 15

Commit

14e8f86

verified ·

1 Parent(s): f898a57

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -14

app.py CHANGED Viewed

@@ -15,24 +15,23 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # ====================== 模型配置 ======================
-# 使用 Hugging Face 的 repo ID 和 GGUF 文件名
-# 这里以 4B 模型的 Q4_K_M 版本为例，你可以根据需要修改
 REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
 FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
-MODEL_ID = "qwen3.5-4b"  # 自定义模型 ID
-# 加载模型 (CPU)
 logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
 llm = Llama.from_pretrained(
     repo_id=REPO_ID,
     filename=FILENAME,
-    n_ctx=4096,          # 上下文窗口，可根据需要调整
     n_threads=None,       # 自动使用所有 CPU 线程
     verbose=False,
 )
 logger.info("模型加载完成！")
-app = FastAPI(title="Qwen3.5 GGUF API (CoPaw兼容)")
 # ====================== CORS 中间件 ======================
 app.add_middleware(
@@ -50,7 +49,12 @@ async def health():
 @app.get("/v1/me")
 async def get_me():
-    return {"id": "local-user", "name": "Local User", "email": "user@localhost", "is_admin": True}
 @app.get("/v1/dashboard/bots")
 async def get_bots():
@@ -58,7 +62,17 @@ async def get_bots():
 @app.get("/v1/models")
 async def list_models():
-    return {"object": "list", "data": [{"id": MODEL_ID, "object": "model", "created": 1773000000, "owned_by": "user"}]}
 # ====================== 请求/响应数据模型 ======================
 class Message(BaseModel):
@@ -81,7 +95,10 @@ def convert_content_to_str(content: Optional[Union[str, List[Dict[str, Any]]]])
     if isinstance(content, str):
         return content
     if isinstance(content, list):
-        texts = [part.get("text", "") for part in content if isinstance(part, dict) and part.get("type") == "text"]
         return "\n".join(texts)
     return str(content)
@@ -91,11 +108,14 @@ async def chat_completions(req: ChatRequest):
     # 转换消息格式
     messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
-    # 处理 tools (简单示例，将 tools 信息添加到系统提示)
     if req.tools:
         tools_json = json.dumps(req.tools, ensure_ascii=False)
-        tool_prompt = f"你是一个助手，可以使用以下工具：{tools_json}\n当需要调用工具时，请输出 <tool_call>{{...}}</tool_call>。"
-        # 检查是否有 system 消息，有则合并，否则创建
         system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
         if system_index is not None:
             messages[system_index]["content"] += "\n\n" + tool_prompt
@@ -110,10 +130,10 @@ async def chat_completions(req: ChatRequest):
             max_tokens=req.max_tokens,
             stream=True,
         )
         async def generate():
             chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
             for chunk in stream:
-                # 适配 OpenAI 流式格式
                 if chunk.choices:
                     delta = chunk.choices[0].delta
                     finish_reason = chunk.choices[0].finish_reason
@@ -145,4 +165,4 @@ async def chat_completions(req: ChatRequest):
 @app.get("/")
 async def root():
-    return {"status": "running", "model": REPO_ID + "/" + FILENAME}

 logger = logging.getLogger(__name__)
 # ====================== 模型配置 ======================
+# 使用 Hugging Face 上的 GGUF 模型（4B Q4_K_M 版本）
 REPO_ID = "lmstudio-community/Qwen3.5-4B-GGUF"
 FILENAME = "Qwen3.5-4B-Q4_K_M.gguf"
+MODEL_ID = "qwen3.5-4b"  # CoPaw 中配置的模型名称
+# 加载模型（自动从 HF 下载并缓存）
 logger.info(f"正在从 {REPO_ID} 加载模型 {FILENAME}...")
 llm = Llama.from_pretrained(
     repo_id=REPO_ID,
     filename=FILENAME,
+    n_ctx=4096,          # 上下文窗口，可根据需求调整
     n_threads=None,       # 自动使用所有 CPU 线程
     verbose=False,
 )
 logger.info("模型加载完成！")
+app = FastAPI(title="Qwen3.5-4B GGUF API (CoPaw兼容)")
 # ====================== CORS 中间件 ======================
 app.add_middleware(
 @app.get("/v1/me")
 async def get_me():
+    return {
+        "id": "local-user",
+        "name": "Local User",
+        "email": "user@localhost",
+        "is_admin": True
+    }
 @app.get("/v1/dashboard/bots")
 async def get_bots():
 @app.get("/v1/models")
 async def list_models():
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": MODEL_ID,
+                "object": "model",
+                "created": 1773000000,
+                "owned_by": "user"
+            }
+        ]
+    }
 # ====================== 请求/响应数据模型 ======================
 class Message(BaseModel):
     if isinstance(content, str):
         return content
     if isinstance(content, list):
+        texts = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                texts.append(part.get("text", ""))
         return "\n".join(texts)
     return str(content)
     # 转换消息格式
     messages = [{"role": m.role, "content": convert_content_to_str(m.content)} for m in req.messages]
+    # 处理 tools：将工具描述合并到 system 消息中
     if req.tools:
         tools_json = json.dumps(req.tools, ensure_ascii=False)
+        tool_prompt = (
+            f"你是一个助手，可以使用以下工具：\n{tools_json}\n"
+            f"当用户的问题需要调用工具时，请输出 <tool_call>{{...}}</tool_call> 格式的 JSON。"
+        )
+        # 查找现有 system 消息，有则合并，否则创建
         system_index = next((i for i, m in enumerate(messages) if m["role"] == "system"), None)
         if system_index is not None:
             messages[system_index]["content"] += "\n\n" + tool_prompt
             max_tokens=req.max_tokens,
             stream=True,
         )
         async def generate():
             chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
             for chunk in stream:
                 if chunk.choices:
                     delta = chunk.choices[0].delta
                     finish_reason = chunk.choices[0].finish_reason
 @app.get("/")
 async def root():
+    return {"status": "running", "model": f"{REPO_ID}/{FILENAME}"}