wdawdwa commited on
Commit
9f2b1d2
·
1 Parent(s): 519d149

Deploy Qwen Local API to Hugging Face

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用官方的 Python 3.10 slim 版本作为基础环境
2
+ FROM python:3.10-slim
3
+
4
+ # 设置工作目录
5
+ WORKDIR /app
6
+
7
+ # !!!终极修正:使用 COPY . /app 来正确复制整个项目结构!!!
8
+ # 这会把你的本地的 app 文件夹、requirements.txt 等,全部复制到容器的 /app 目录下
9
+ COPY . /app
10
+
11
+ # 设置环境变量,告诉 Python 模块的搜索路径
12
+ ENV PYTHONPATH=/app
13
+
14
+ # 安装所有 Python 依赖,使用国内源加速
15
+ RUN pip install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
16
+
17
+ # 容器启动时要执行的命令
18
+ # 这个命令现在非常标准和稳定
19
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8082"]
README.md CHANGED
@@ -1,12 +1,8 @@
1
  ---
2
- title: GPT3.5
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: docker
7
- pinned: false
8
- license: other
9
- short_description: GPT3.5 free API
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Qwen Local API
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
+ app_port: 8082
 
 
8
  ---
 
 
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/core/config.py (v7.2 最终修正版)
2
+
3
+ from pydantic_settings import BaseSettings
4
+ # !!!确保这一行包含了 Optional !!!
5
+ from typing import Dict, List, Optional
6
+
7
+ class Settings(BaseSettings):
8
+ """
9
+ 应用配置 (v7.2 最终修正版)
10
+ - 修正了因缺少 Optional 导入导致的启动崩溃问题。
11
+ """
12
+ # --- 服务监听端口 ---
13
+ LISTEN_PORT: int = 8082
14
+
15
+ # --- 应用元数据 ---
16
+ APP_NAME: str = "Qwen Multi-Account Local API"
17
+ APP_VERSION: str = "7.2.0"
18
+ DESCRIPTION: str = "一个支持根据模型名称动态切换账号并具备密钥认证功能的高性能通义千问本地代理。"
19
+
20
+ # --- 认证与安全 ---
21
+ API_MASTER_KEY: Optional[str] = None
22
+
23
+ # --- 模型与账号的映射关系 ---
24
+ MODEL_TO_ACCOUNT_MAP: Dict[str, int] = {
25
+ "Qwen3-Max-Preview": 2
26
+ }
27
+
28
+ # --- 全面更新的模型列表 (仅供参考,实际以你的账号支持为准) ---
29
+ SUPPORTED_MODELS: List[str] = [
30
+ "qwen-plus", "qwen-turbo", "qwen-max", "qwen-long", "qwen-vl-plus",
31
+ "Qwen3-Max-Preview",
32
+ ]
33
+
34
+ # --- 国内站账号 1 (默认) ---
35
+ CN_ACCOUNT_1_COOKIE: str = ""
36
+ CN_ACCOUNT_1_XSRF_TOKEN: str = ""
37
+
38
+ # --- 国内站账号 2 (专属) ---
39
+ CN_ACCOUNT_2_COOKIE: str = ""
40
+ CN_ACCOUNT_2_XSRF_TOKEN: str = ""
41
+
42
+ # --- 国际站账号 (可选) ---
43
+ INTL_COOKIE: str = ""
44
+ INTL_AUTHORIZATION: str = ""
45
+ INTL_BX_UA: str = ""
46
+
47
+ class Config:
48
+ env_file = ".env"
49
+ env_file_encoding = 'utf-8'
50
+
51
+ settings = Settings()
app/providers/__init__.py ADDED
File without changes
app/providers/base.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, AsyncGenerator, Union
3
+ from fastapi import Request
4
+
5
+ class BaseProvider(ABC):
6
+ """所有 Provider 的抽象基类"""
7
+
8
+ @abstractmethod
9
+ async def chat_completion(
10
+ self,
11
+ request_data: Dict[str, Any],
12
+ original_request: Request
13
+ ) -> Union[Dict[str, Any], AsyncGenerator[str, None]]:
14
+ """处理聊天补全请求的核心方法"""
15
+ pass
app/providers/text_provider.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/providers/text_provider.py (v11.0 终极版 - 健壮的增量转换)
2
+
3
+ import httpx
4
+ import json
5
+ import uuid
6
+ import time
7
+ import traceback
8
+ import asyncio
9
+ from typing import Dict, Any, AsyncGenerator, Union, List
10
+
11
+ from fastapi import Request
12
+ from fastapi.responses import StreamingResponse, JSONResponse
13
+
14
+ from app.providers.base import BaseProvider
15
+ from app.core.config import settings
16
+
17
+ import logging
18
+ logger = logging.getLogger(__name__)
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
+
21
+
22
+ class TextProvider(BaseProvider):
23
+ """
24
+ 通义千问全能提供商 (v11.0 终极版)
25
+ - 采用最终修正的状态化解析器,逻辑清晰,确保将官网的“全量累积流”正确转换为客户端所需的“增量流”。
26
+ - 彻底解决所有场景下的内容重复问题。
27
+ - 保留了会话预热和国际站功能。
28
+ """
29
+
30
+ # --------------------------------------------------------------------------
31
+ # 核心入口
32
+ # --------------------------------------------------------------------------
33
+ async def chat_completion(self, request_data: Dict[str, Any], original_request: Request) -> Union[StreamingResponse, JSONResponse]:
34
+ model_name = request_data.get("model", "qwen-plus")
35
+ task_type = self._get_task_type(model_name, request_data)
36
+ try:
37
+ if task_type in ["image", "video"]:
38
+ logger.info(f"检测到 '{task_type}' 任务,强制使用国际站(INTL)模式...")
39
+ return await self._handle_long_polling_task(request_data)
40
+ else:
41
+ account_id = settings.MODEL_TO_ACCOUNT_MAP.get(model_name, 1)
42
+ logger.info(f"检测到模型 '{model_name}',任务类型 '{task_type}',将使用国内站账号 {account_id}...")
43
+ return await self._handle_stream_task(request_data, account_id)
44
+ except Exception as e:
45
+ logger.error(f"处理任务时出错: {type(e).__name__}: {e}")
46
+ traceback.print_exc()
47
+ return JSONResponse(content={"error": {"message": f"处理任务时出错: {e}", "type": "provider_error"}}, status_code=500)
48
+
49
+ # --------------------------------------------------------------------------
50
+ # 国内站流式任务处理
51
+ # --------------------------------------------------------------------------
52
+ async def _handle_stream_task(self, request_data: Dict[str, Any], account_id: int) -> StreamingResponse:
53
+ headers = self._prepare_cn_headers(account_id)
54
+ await self._prewarm_session(headers)
55
+ payload = self._prepare_cn_payload(request_data)
56
+ model_name_for_client = request_data.get("model", "qwen-plus")
57
+ url = "https://api.tongyi.com/dialog/conversation"
58
+ logger.info(f" [CN-Account-{account_id}] 正在向模型 '{model_name_for_client}' 发送流式请求...")
59
+ return StreamingResponse(self._stream_generator(url, headers, payload, model_name_for_client), media_type="text/event-stream")
60
+
61
+ async def _prewarm_session(self, headers: Dict[str, Any]):
62
+ try:
63
+ logger.info(" [Pre-warm] 正在发送会话预热请求...")
64
+ url = "https://api.tongyi.com/assistant/api/record/list"
65
+ payload = {
66
+ "pageNo": 1, "terminal": "web", "pageSize": 10000, "module": "uploadhistory",
67
+ "fileTypes": ["file", "audio", "video"], "recordSources": ["chat", "zhiwen", "tingwu"],
68
+ "status": [20, 30, 40, 41], "taskTypes": ["local", "net_source", "doc_read", "paper_read", "book_read"]
69
+ }
70
+ async with httpx.AsyncClient() as client:
71
+ prewarm_headers = headers.copy()
72
+ prewarm_headers['Accept'] = 'application/json, text/plain, */*'
73
+ response = await client.post(url, headers=prewarm_headers, json=payload, timeout=10)
74
+ response.raise_for_status()
75
+ logger.info(" [Pre-warm] ✅ 会话预热成功!")
76
+ except Exception as e:
77
+ logger.warning(f" [Pre-warm] ⚠️ 会话预热失败: {e}。继续尝试...")
78
+
79
+ # --------------------------------------------------------------------------
80
+ # 终极版高级流式解析器 (v11.0) - 核心修改
81
+ # --------------------------------------------------------------------------
82
+ async def _stream_generator(self, url: str, headers: Dict, payload: Dict, model_name: str) -> AsyncGenerator[str, None]:
83
+ """
84
+ 健壮的状态化流式生成器,将通义千问的“全量累积流”转换为标准的“增量流”。
85
+ """
86
+ chat_id = f"chatcmpl-{uuid.uuid4().hex}"
87
+ is_first_chunk = True
88
+ full_content_so_far = "" # 关键(1) 🧠: 状态变量在生成器函数的顶层作用域,确保在循环中持久存在
89
+
90
+ try:
91
+ async with httpx.AsyncClient(timeout=60) as client:
92
+ async with client.stream("POST", url, headers=headers, json=payload) as response:
93
+ response.raise_for_status()
94
+
95
+ async for line in response.aiter_lines():
96
+ if not line.startswith('data:'):
97
+ continue
98
+
99
+ raw_data_str = line.strip()[len('data:'):]
100
+ if not raw_data_str or "[DONE]" in raw_data_str:
101
+ continue
102
+
103
+ try:
104
+ qwen_data = json.loads(raw_data_str)
105
+
106
+ # 从所有内容块中筛选出 'text' 类型的内容块
107
+ text_blocks = [block for block in qwen_data.get("contents", []) if block.get("contentType") == "text"]
108
+ if not text_blocks:
109
+ continue
110
+
111
+ # 通常我们只关心最后一个text块,因为它包含了最新的完整内容
112
+ latest_text_block = text_blocks[-1]
113
+ new_full_content = latest_text_block.get("content", "")
114
+
115
+ if new_full_content is None:
116
+ continue
117
+
118
+ # 关键(2) 💡: 基于持久化的状态,精确计算增量
119
+ delta_content = ""
120
+ if new_full_content.startswith(full_content_so_far):
121
+ delta_content = new_full_content[len(full_content_so_far):]
122
+ else:
123
+ logger.warning(f" [Stream Reset] 流内容不连续,将发送全部新内容。")
124
+ delta_content = new_full_content
125
+
126
+ # 如果没有实际的新内容,就跳过
127
+ if not delta_content:
128
+ continue
129
+
130
+ # 关键(3) ✅: 发送增量前,先发送角色信息(仅一次)
131
+ if is_first_chunk:
132
+ role_chunk = {
133
+ "id": chat_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
134
+ "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}]
135
+ }
136
+ yield f"data: {json.dumps(role_chunk, ensure_ascii=False)}\n\n"
137
+ is_first_chunk = False
138
+
139
+ # 发送真正的增量内容块
140
+ openai_chunk = {
141
+ "id": chat_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
142
+ "choices": [{"index": 0, "delta": {"content": delta_content}, "finish_reason": None}]
143
+ }
144
+ yield f"data: {json.dumps(openai_chunk, ensure_ascii=False)}\n\n"
145
+
146
+ # 关键(4) 🔄: 更新状态,为处理下一个 data: 消息做准备
147
+ full_content_so_far = new_full_content
148
+
149
+ except json.JSONDecodeError:
150
+ logger.warning(f" [Warning] JSON 解析失败: {raw_data_str}")
151
+ continue
152
+
153
+ # 流结束,发送终止块
154
+ final_chunk = {
155
+ "id": chat_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model_name,
156
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
157
+ }
158
+ yield f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"
159
+
160
+ except Exception as e:
161
+ logger.error(f" [Error] 流式生成器发生错误: {e}")
162
+ traceback.print_exc()
163
+
164
+ finally:
165
+ logger.info(" [Stream] 流式传输结束。")
166
+ yield "data: [DONE]\n\n"
167
+
168
+ # --------------------------------------------------------------------------
169
+ # 辅助函数 (保持不变)
170
+ # --------------------------------------------------------------------------
171
+ def _get_task_type(self, model_name: str, request_data: Dict[str, Any]) -> str:
172
+ model_name_lower = model_name.lower()
173
+ if "wanx" in model_name_lower: return "image"
174
+ if "animate" in model_name_lower: return "video"
175
+ if "vl" in model_name_lower or "qvq" in model_name_lower: return "vision"
176
+ return "text"
177
+
178
+ def _prepare_cn_headers(self, account_id: int) -> Dict[str, str]:
179
+ try:
180
+ cookie = getattr(settings, f"CN_ACCOUNT_{account_id}_COOKIE")
181
+ xsrf_token = getattr(settings, f"CN_ACCOUNT_{account_id}_XSRF_TOKEN")
182
+ except AttributeError: raise ValueError(f"国内站账号 {account_id} 的配置不完整。")
183
+ if not cookie or not xsrf_token: raise ValueError(f"国内站账号 {account_id} 的认证信息为空。")
184
+ safe_cookie = cookie.encode('utf-8').decode('latin-1')
185
+ return {'Origin': 'https://www.tongyi.com', 'Referer': 'https://www.tongyi.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36', 'Cookie': safe_cookie, 'x-xsrf-token': xsrf_token, 'x-platform': 'pc_tongyi', 'Accept': 'text/event-stream', 'Content-Type': 'application/json;charset=UTF-8'}
186
+
187
+ def _prepare_cn_payload(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
188
+ messages: List[Dict[str, Any]] = request_data.get("messages", [])
189
+ if not messages: messages = [{"role": "user", "content": request_data.get("prompt", "你好")}]
190
+ qwen_contents = []
191
+ for msg in messages:
192
+ content = msg.get("content")
193
+ if isinstance(content, str): qwen_contents.append({"role": msg.get("role"), "content": content, "contentType": "text"})
194
+ model_in_payload = request_data.get("model", "")
195
+ return {"action": "next", "contents": qwen_contents, "model": model_in_payload, "parentMsgId": "", "requestId": str(uuid.uuid4()), "sessionId": "", "sessionType": "text_chat", "userAction": "new_top", "feature_config": {"search_enabled": False, "thinking_enabled": False}}
196
+
197
+ # --------------------------------------------------------------------------
198
+ # 国际站相关函数 (保留)
199
+ # --------------------------------------------------------------------------
200
+ def _prepare_intl_headers(self) -> Dict[str, str]:
201
+ if not settings.INTL_AUTHORIZATION or not settings.INTL_COOKIE or not settings.INTL_BX_UA:
202
+ raise ValueError("国际站(intl)认证信息不完整,请检查.env文件。")
203
+ safe_cookie = settings.INTL_COOKIE.encode('utf-8').decode('latin-1')
204
+ return {'Origin': 'https://chat.qwen.ai', 'Referer': 'https://chat.qwen.ai/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36', 'Authorization': settings.INTL_AUTHORIZATION, 'Cookie': safe_cookie, 'bx-ua': settings.INTL_BX_UA}
205
+
206
+ async def _handle_long_polling_task(self, request_data: Dict[str, Any]) -> JSONResponse:
207
+ headers = self._prepare_intl_headers()
208
+ headers['Accept'] = 'application/json, text/event-stream'
209
+ headers['Content-Type'] = 'application/json;charset=UTF-8'
210
+ completions_url = "https://chat.qwen.ai/api/v2/chat/completions"
211
+ task_status_url_template = "https://chat.qwen.ai/api/v1/tasks/status/{task_id}"
212
+ prompt = request_data.get("prompt", "一只猫")
213
+ model_name = "wanx-v1" if "wanx" in request_data.get("model", "") else "animate-v1"
214
+ msg_type = "t2i" if model_name == "wanx-v1" else "t2v"
215
+ payload = {"action": "next", "contents": [{"content": prompt, "contentType": "text", "role": "user"}], "msg_type": msg_type, "mode": "chat", "model": model_name, "parentMsgId": "", "requestId": str(uuid.uuid4())}
216
+ async with httpx.AsyncClient(timeout=60) as client:
217
+ logger.info(f" [INTL] 正在启动 '{model_name}' 任务...")
218
+ response = await client.post(completions_url, headers=headers, json=payload)
219
+ response.raise_for_status()
220
+ task_id = None
221
+ async for line in response.aiter_lines():
222
+ if line.startswith('data:'):
223
+ try:
224
+ data = json.loads(line[len('data:'):])
225
+ if data.get("taskIds"):
226
+ task_id = data["taskIds"][0]
227
+ break
228
+ except json.JSONDecodeError: continue
229
+ if not task_id: raise ValueError(f"{model_name} 任务启动失败。")
230
+ logger.info(f" [INTL] 成功获取任务 ID: {task_id}")
231
+ for i in range(120):
232
+ await asyncio.sleep(3)
233
+ status_url = task_status_url_template.format(task_id=task_id)
234
+ status_response = await client.get(status_url, headers=headers)
235
+ if status_response.status_code == 200:
236
+ data = status_response.json()
237
+ if data.get("status") == "succeeded":
238
+ logger.info(f" [INTL] {model_name} 任务成功!")
239
+ return self._format_media_response(data, request_data, model_name)
240
+ if data.get("status") == "failed":
241
+ raise RuntimeError(f"任务失败: {data.get('result', '未知错误')}")
242
+ raise TimeoutError("任务超时。")
243
+
244
+ def _format_media_response(self, task_result: Dict[str, Any], request_data: Dict[str, Any], task_type: str) -> JSONResponse:
245
+ model_name = request_data.get("model")
246
+ items = task_result.get("result", {}).get("images" if "wanx" in task_type else "videos", [])
247
+ urls = [item.get("url") for item in items if item.get("url")]
248
+ content = "\n".join(f"!image({url})" for url in urls) if "wanx" in task_type else "\n".join(f"视频链接: {url}" for url in urls)
249
+ response_data = {"id": f"chatcmpl-{uuid.uuid4().hex}", "object": "chat.completion", "created": int(time.time()), "model": model_name, "choices": [{"index": 0, "message": {"role": "assistant", "content": content or "生成完成,但未能获取链接。"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}}
250
+ return JSONResponse(content=response_data)
docker-compose.yml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # qwen-local/docker-compose.yml (终极架构修正版)
2
+
3
+ services:
4
+ # 这是我们的“前台总机”服务
5
+ nginx:
6
+ image: nginx:latest
7
+ ports:
8
+ - "8082:80"
9
+ volumes:
10
+ # 关键修正: 直接覆盖主配置文件,而不是作为子配置被包含
11
+ - ./nginx.conf:/etc/nginx/nginx.conf:ro
12
+ depends_on:
13
+ - qwen-local
14
+ networks:
15
+ - shared_network
16
+
17
+ # 这是我们的“工人”服务
18
+ qwen-local:
19
+ build:
20
+ context: .
21
+ dockerfile: Dockerfile
22
+ restart: unless-stopped
23
+ # 工人不直接对外暴露端口
24
+ env_file:
25
+ - .env
26
+ environment:
27
+ - API_MASTER_KEY=1
28
+ - HTTP_PROXY=http://host.docker.internal:7890
29
+ - HTTPS_PROXY=http://host.docker.internal:7890
30
+ - NO_PROXY=localhost,127.0.0.1
31
+ extra_hosts:
32
+ - "host.docker.internal:host-gateway"
33
+ networks:
34
+ - shared_network
35
+
36
+ networks:
37
+ shared_network:
38
+ external: true
main.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py (v7.1 密钥认证版)
2
+
3
+ import traceback
4
+ from typing import Optional
5
+
6
+ from fastapi import FastAPI, Request, HTTPException, Depends, Header
7
+
8
+ from app.core.config import settings
9
+ from app.providers.text_provider import TextProvider
10
+
11
+ app = FastAPI(
12
+ title=settings.APP_NAME,
13
+ version=settings.APP_VERSION,
14
+ description=settings.DESCRIPTION
15
+ )
16
+
17
+ # 只需一个全能的 Provider
18
+ text_provider = TextProvider()
19
+
20
+
21
+ # --- 认证依赖项 ---
22
+ async def verify_api_key(authorization: Optional[str] = Header(None)):
23
+ """
24
+ 检查 API 密钥的依赖项。
25
+ 如果设置了 API_MASTER_KEY,则请求头中必须包含正确的密钥。
26
+ """
27
+ # 如果 .env 或 docker-compose.yml 中没有配置 API_MASTER_KEY,则跳过认证
28
+ if not settings.API_MASTER_KEY:
29
+ print("警告:未配置 API_MASTER_KEY,服务将对所有请求开放。")
30
+ return
31
+
32
+ # 如果配置了密钥,但请求头中没有 Authorization,则拒绝
33
+ if authorization is None:
34
+ raise HTTPException(
35
+ status_code=401,
36
+ detail="Unauthorized: Missing Authorization header.",
37
+ )
38
+
39
+ # 检查认证方案和密钥是否正确
40
+ try:
41
+ scheme, token = authorization.split()
42
+ if scheme.lower() != "bearer":
43
+ raise ValueError("Invalid scheme")
44
+ except ValueError:
45
+ raise HTTPException(
46
+ status_code=401,
47
+ detail="Invalid authentication scheme. Use 'Bearer <your_api_key>'.",
48
+ )
49
+
50
+ # 核心验证:将传入的 token 与我们的主密钥进行安全比较
51
+ if token != settings.API_MASTER_KEY:
52
+ raise HTTPException(
53
+ status_code=403,
54
+ detail="Forbidden: Invalid API Key.",
55
+ )
56
+ # 认证通过,请求将继续被处理
57
+
58
+
59
+ @app.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
60
+ async def chat_completions(request: Request):
61
+ """
62
+ 终极路由:所有请求都交给全能的 TextProvider 处理。
63
+ 在处理前会先通过 verify_api_key 进行认证。
64
+ """
65
+ try:
66
+ request_data = await request.json()
67
+ print("接收到请求,认证通过,路由到全能的 TextProvider...")
68
+ return await text_provider.chat_completion(request_data, request)
69
+ except Exception as e:
70
+ traceback.print_exc()
71
+ raise HTTPException(status_code=500, detail=f"主路由发生内部服务器错误: {str(e)}")
72
+
73
+
74
+ @app.get("/")
75
+ def root():
76
+ """根路由,提供服务基本信息,无需认证。"""
77
+ return {"message": f"Welcome to {settings.APP_NAME}", "version": settings.APP_VERSION}
nginx.conf ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =================================================================
2
+ # Qwen Local API - 终极粘性会话与性能版 Nginx 配置
3
+ # 核心: 绝对信任后端,零干预,极致吞吐,并采用最健壮的会话保持策略
4
+ # =================================================================
5
+
6
+ # --- 全局性能设置 ---
7
+ worker_processes auto;
8
+ worker_rlimit_nofile 102400;
9
+
10
+ # --- 事件模型优化 ---
11
+ events {
12
+ worker_connections 102400;
13
+ use epoll;
14
+ multi_accept on;
15
+ }
16
+
17
+ # --- HTTP 核心配置 ---
18
+ http {
19
+ # --- 基础性能优化 ---
20
+ sendfile on;
21
+ tcp_nopush on;
22
+ tcp_nodelay on;
23
+ keepalive_timeout 15s;
24
+ client_body_timeout 10s;
25
+ client_header_timeout 10s;
26
+ server_tokens off;
27
+ access_log off;
28
+
29
+
30
+ # --- 上游服务器组 (我们的 AI 工人) ---
31
+ upstream qwen_backend {
32
+ # 关键修正 🚀: 使用更健壮的 hash 方法实现“终极粘性会话”
33
+ # 我们不再依赖可能不稳定的客户端 IP,而是使用 Authorization 请求头进行哈希。
34
+ # 因为来自同一个客户端的所有请求都包含相同的 API Key,这就像“人脸识别”,
35
+ # 确保了100%的会话保持,从根本上杜绝流式输出的混乱问题。
36
+ # `consistent` 关键字确保在工人数量变化时,尽可能少地重新映射会话。
37
+ hash $http_authorization consistent;
38
+
39
+ # 性能策略: 开启与工人的“VIP连接池”,实现极致连接复用
40
+ keepalive 128;
41
+
42
+ # 信任策略: 移除所有健康检查和熔断机制
43
+ server qwen-local:8082;
44
+ }
45
+
46
+
47
+ # --- 主服务器配置 (API 网关) ---
48
+ server {
49
+ listen 80;
50
+
51
+ location / {
52
+ # 性能策略: 移除所有请求限流
53
+ proxy_pass http://qwen_backend;
54
+
55
+ # --- 流式传输终极优化 ---
56
+ proxy_buffering off;
57
+ proxy_cache off;
58
+
59
+ # --- 协议与头信息设置 ---
60
+ proxy_http_version 1.1;
61
+ proxy_set_header Connection "";
62
+ proxy_set_header Host $host;
63
+ proxy_set_header X-Real-IP $remote_addr;
64
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
65
+ proxy_set_header X-Forwarded-Proto $scheme;
66
+ }
67
+ }
68
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ httpx
4
+ pydantic-settings
5
+ python-dotenv