| # ================================================================= | |
| # Qwen Local API - 终极粘性会话与性能版 Nginx 配置 | |
| # 核心: 绝对信任后端,零干预,极致吞吐,并采用最健壮的会话保持策略 | |
| # ================================================================= | |
| # --- 全局性能设置 --- | |
| worker_processes auto; | |
| worker_rlimit_nofile 102400; | |
| # --- 事件模型优化 --- | |
| events { | |
| worker_connections 102400; | |
| use epoll; | |
| multi_accept on; | |
| } | |
| # --- HTTP 核心配置 --- | |
| http { | |
| # --- 基础性能优化 --- | |
| sendfile on; | |
| tcp_nopush on; | |
| tcp_nodelay on; | |
| keepalive_timeout 15s; | |
| client_body_timeout 10s; | |
| client_header_timeout 10s; | |
| server_tokens off; | |
| access_log off; | |
| # --- 上游服务器组 (我们的 AI 工人) --- | |
| upstream qwen_backend { | |
| # 关键修正 🚀: 使用更健壮的 hash 方法实现“终极粘性会话” | |
| # 我们不再依赖可能不稳定的客户端 IP,而是使用 Authorization 请求头进行哈希。 | |
| # 因为来自同一个客户端的所有请求都包含相同的 API Key,这就像“人脸识别”, | |
| # 确保了100%的会话保持,从根本上杜绝流式输出的混乱问题。 | |
| # `consistent` 关键字确保在工人数量变化时,尽可能少地重新映射会话。 | |
| hash $http_authorization consistent; | |
| # 性能策略: 开启与工人的“VIP连接池”,实现极致连接复用 | |
| keepalive 128; | |
| # 信任策略: 移除所有健康检查和熔断机制 | |
| server qwen-local:8082; | |
| } | |
| # --- 主服务器配置 (API 网关) --- | |
| server { | |
| listen 80; | |
| location / { | |
| # 性能策略: 移除所有请求限流 | |
| proxy_pass http://qwen_backend; | |
| # --- 流式传输终极优化 --- | |
| proxy_buffering off; | |
| proxy_cache off; | |
| # --- 协议与头信息设置 --- | |
| proxy_http_version 1.1; | |
| proxy_set_header Connection ""; | |
| proxy_set_header Host $host; | |
| proxy_set_header X-Real-IP $remote_addr; | |
| proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
| proxy_set_header X-Forwarded-Proto $scheme; | |
| } | |
| } | |
| } | |