zapi

Sleeping

File size: 65,742 Bytes

package main

import (
	"bufio"
	"bytes"
	"crypto/hmac"
	"crypto/sha256"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"net/http"
	"net/url"
	"os"
	"regexp"
	"strings"
	"sync"
	"time"

	"github.com/google/uuid"
)

// 配置变量（从环境变量读取）
var (
	UPSTREAM_URL      string
	DEFAULT_KEY       string
	ZAI_TOKEN         string
	MODEL_NAME        string
	PORT              string
	DEBUG_MODE        bool
	DEFAULT_STREAM    bool
	DASHBOARD_ENABLED bool
	ENABLE_THINKING   bool
)

// 请求统计信息
type RequestStats struct {
	TotalRequests       int64
	SuccessfulRequests  int64
	FailedRequests      int64
	LastRequestTime     time.Time
	AverageResponseTime time.Duration
}

// 实时请求信息
type LiveRequest struct {
	ID        string    `json:"id"`
	Timestamp time.Time `json:"timestamp"`
	Method    string    `json:"method"`
	Path      string    `json:"path"`
	Status    int       `json:"status"`
	Duration  int64     `json:"duration"`
	UserAgent string    `json:"user_agent"`
}

// 全局变量
var (
	stats         RequestStats
	liveRequests  = []LiveRequest{} // 初始化为空数组，而不是 nil
	statsMutex    sync.Mutex
	requestsMutex sync.Mutex
)

// 思考内容处理策略
const (
	THINK_TAGS_MODE = "strip" // strip: 去除<details>标签；think: 转为<think>标签；raw: 保留原样
)

// 系统配置常量
const (
	MAX_LIVE_REQUESTS      = 200        // 最多保留的实时请求记录数
	AUTH_TOKEN_TIMEOUT     = 30         // 获取匿名token的超时时间（秒）
	UPSTREAM_TIMEOUT       = 200        // 上游API调用超时时间（秒）
	TOKEN_DISPLAY_LENGTH   = 10         // token显示时的截取长度
	NANOSECONDS_TO_SECONDS = 1000000000 // 纳秒转秒的倍数
)

// 伪装前端头部（2025-09-30 更新：修复426错误）
const (
	X_FE_VERSION   = "prod-fe-1.0.94"                                                                                                  // 更新：1.0.70 → 1.0.94
	BROWSER_UA     = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36" // 更新：Chrome 139 → 140
	SEC_CH_UA      = "\"Chromium\";v=\"140\", \"Not=A?Brand\";v=\"24\", \"Google Chrome\";v=\"140\""                                   // 更新：Chrome 140
	SEC_CH_UA_MOB  = "?0"
	SEC_CH_UA_PLAT = "\"Windows\""
	ORIGIN_BASE    = "https://chat.z.ai"
)

// 匿名token开关
const ANON_TOKEN_ENABLED = true

// 从环境变量初始化配置
func initConfig() {
	// 加载 .env.local 文件（如果存在）
	loadEnvFile(".env.local")
	// 也尝试加载标准的 .env 文件
	loadEnvFile(".env")

	UPSTREAM_URL = getEnv("UPSTREAM_URL", "https://chat.z.ai/api/chat/completions")
	DEFAULT_KEY = getEnv("DEFAULT_KEY", "sk-your-key")
	ZAI_TOKEN = getEnv("ZAI_TOKEN", "")
	MODEL_NAME = getEnv("MODEL_NAME", "GLM-4.6")
	PORT = getEnv("PORT", "7860")

	// 处理PORT格式，确保有冒号前缀
	if !strings.HasPrefix(PORT, ":") {
		PORT = ":" + PORT
	}

	DEBUG_MODE = getEnv("DEBUG_MODE", "true") == "true"
	DEFAULT_STREAM = getEnv("DEFAULT_STREAM", "true") == "true"
	DASHBOARD_ENABLED = getEnv("DASHBOARD_ENABLED", "true") == "true"
	ENABLE_THINKING = getEnv("ENABLE_THINKING", "true") == "true"
}

// 记录请求统计信息
func recordRequestStats(startTime time.Time, path string, status int) {
	duration := time.Since(startTime)

	statsMutex.Lock()
	defer statsMutex.Unlock()

	stats.TotalRequests++
	stats.LastRequestTime = time.Now()

	if status >= 200 && status < 300 {
		stats.SuccessfulRequests++
	} else {
		stats.FailedRequests++
	}

	// 更新平均响应时间
	if stats.TotalRequests > 0 {
		totalDuration := stats.AverageResponseTime*time.Duration(stats.TotalRequests-1) + duration
		stats.AverageResponseTime = totalDuration / time.Duration(stats.TotalRequests)
	} else {
		stats.AverageResponseTime = duration
	}
}

// 添加实时请求信息
func addLiveRequest(method, path string, status int, duration time.Duration, _, userAgent string) {
	requestsMutex.Lock()
	defer requestsMutex.Unlock()

	request := LiveRequest{
		ID:        fmt.Sprintf("%d", time.Now().UnixNano()),
		Timestamp: time.Now(),
		Method:    method,
		Path:      path,
		Status:    status,
		Duration:  duration.Milliseconds(),
		UserAgent: userAgent,
	}

	liveRequests = append(liveRequests, request)

	// 只保留最近的请求记录
	if len(liveRequests) > MAX_LIVE_REQUESTS {
		liveRequests = liveRequests[1:]
	}
}

// 获取实时请求数据（用于SSE）
func getLiveRequestsData() []byte {
	requestsMutex.Lock()
	defer requestsMutex.Unlock()

	// 确保 liveRequests 不为 nil
	if liveRequests == nil {
		liveRequests = []LiveRequest{}
	}

	data, err := json.Marshal(liveRequests)
	if err != nil {
		// 如果序列化失败，返回空数组
		emptyArray := []LiveRequest{}
		data, _ = json.Marshal(emptyArray)
	}
	return data
}

// 获取统计数据（用于SSE）
func getStatsData() []byte {
	statsMutex.Lock()
	defer statsMutex.Unlock()

	data, _ := json.Marshal(stats)
	return data
}

// 获取环境变量，如果不存在则返回默认值
func getEnv(key, defaultValue string) string {
	if value := os.Getenv(key); value != "" {
		return value
	}
	return defaultValue
}

// 加载 .env 文件
func loadEnvFile(filename string) {
	file, err := os.Open(filename)
	if err != nil {
		// 文件不存在时不报错，这样 .env.local 是可选的
		return
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		line := strings.TrimSpace(scanner.Text())
		// 跳过空行和注释行
		if line == "" || strings.HasPrefix(line, "#") {
			continue
		}

		// 解析 KEY=VALUE 格式
		parts := strings.SplitN(line, "=", 2)
		if len(parts) == 2 {
			key := strings.TrimSpace(parts[0])
			value := strings.TrimSpace(parts[1])
			// 只有当环境变量未设置时才从文件加载
			if os.Getenv(key) == "" {
				os.Setenv(key, value)
			}
		}
	}
}

// 获取客户端IP地址
func getClientIP(r *http.Request) string {
	// 检查X-Forwarded-For头
	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
		ips := strings.Split(xff, ",")
		if len(ips) > 0 {
			return strings.TrimSpace(ips[0])
		}
	}

	// 检查X-Real-IP头
	if xri := r.Header.Get("X-Real-IP"); xri != "" {
		return xri
	}

	// 使用RemoteAddr
	ip := r.RemoteAddr
	// 移除端口号
	if strings.Contains(ip, ":") {
		ip = strings.Split(ip, ":")[0]
	}
	return ip
}

// OpenAI 请求结构
type OpenAIRequest struct {
	Model          string    `json:"model"`
	Messages       []Message `json:"messages"`
	Stream         bool      `json:"stream,omitempty"`
	Temperature    float64   `json:"temperature,omitempty"`
	MaxTokens      int       `json:"max_tokens,omitempty"`
	EnableThinking *bool     `json:"enable_thinking,omitempty"`
}

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

// 上游请求结构
type UpstreamRequest struct {
	Stream          bool                   `json:"stream"`
	Model           string                 `json:"model"`
	Messages        []Message              `json:"messages"`
	Params          map[string]interface{} `json:"params"`
	Features        map[string]interface{} `json:"features"`
	BackgroundTasks map[string]bool        `json:"background_tasks,omitempty"`
	ChatID          string                 `json:"chat_id,omitempty"`
	ID              string                 `json:"id,omitempty"`
	MCPServers      []string               `json:"mcp_servers,omitempty"`
	ModelItem       struct {
		ID      string `json:"id"`
		Name    string `json:"name"`
		OwnedBy string `json:"owned_by"`
	} `json:"model_item,omitempty"`
	ToolServers []string          `json:"tool_servers,omitempty"`
	Variables   map[string]string `json:"variables,omitempty"`
}

// OpenAI 响应结构
type OpenAIResponse struct {
	ID      string   `json:"id"`
	Object  string   `json:"object"`
	Created int64    `json:"created"`
	Model   string   `json:"model"`
	Choices []Choice `json:"choices"`
	Usage   Usage    `json:"usage,omitempty"`
}

type Choice struct {
	Index        int     `json:"index"`
	Message      Message `json:"message,omitempty"`
	Delta        Delta   `json:"delta,omitempty"`
	FinishReason string  `json:"finish_reason,omitempty"`
}

type Delta struct {
	Role    string `json:"role,omitempty"`
	Content string `json:"content,omitempty"`
}

type Usage struct {
	PromptTokens     int `json:"prompt_tokens"`
	CompletionTokens int `json:"completion_tokens"`
	TotalTokens      int `json:"total_tokens"`
}

// 上游SSE响应结构
type UpstreamData struct {
	Type string `json:"type"`
	Data struct {
		DeltaContent string         `json:"delta_content"`
		Phase        string         `json:"phase"`
		Done         bool           `json:"done"`
		Usage        Usage          `json:"usage,omitempty"`
		Error        *UpstreamError `json:"error,omitempty"`
		Inner        *struct {
			Error *UpstreamError `json:"error,omitempty"`
		} `json:"data,omitempty"`
	} `json:"data"`
	Error *UpstreamError `json:"error,omitempty"`
}

type UpstreamError struct {
	Detail string `json:"detail"`
	Code   int    `json:"code"`
}

// 模型列表响应
type ModelsResponse struct {
	Object string  `json:"object"`
	Data   []Model `json:"data"`
}

type Model struct {
	ID      string `json:"id"`
	Object  string `json:"object"`
	Created int64  `json:"created"`
	OwnedBy string `json:"owned_by"`
}

// debug日志函数
func debugLog(format string, args ...interface{}) {
	if DEBUG_MODE {
		log.Printf("[DEBUG] "+format, args...)
	}
}

// 转换思考内容的通用函数
func transformThinkingContent(s string) string {
	// 去除 <summary>…</summary>
	s = regexp.MustCompile(`(?s)<summary>.*?</summary>`).ReplaceAllString(s, "")
	// 清理残留自定义标签，如 </thinking>、<Full> 等
	s = strings.ReplaceAll(s, "</thinking>", "")
	s = strings.ReplaceAll(s, "<Full>", "")
	s = strings.ReplaceAll(s, "</Full>", "")
	s = strings.TrimSpace(s)

	switch THINK_TAGS_MODE {
	case "think":
		s = regexp.MustCompile(`<details[^>]*>`).ReplaceAllString(s, "<think>")
		s = strings.ReplaceAll(s, "</details>", "</think>")
	case "strip":
		s = regexp.MustCompile(`<details[^>]*>`).ReplaceAllString(s, "")
		s = strings.ReplaceAll(s, "</details>", "")
	}

	// 处理每行前缀 "> "（包括起始位置）
	s = strings.TrimPrefix(s, "> ")
	s = strings.ReplaceAll(s, "\n> ", "\n")
	return strings.TrimSpace(s)
}

// 根据模型名称获取上游实际模型ID
func getUpstreamModelID(modelName string) string {
	switch modelName {
	case "GLM-4.6":
		return "GLM-4-6-API-V1" // 使用官方API的真实模型名称
	case "GLM-4.5":
		return "0727-360B-API"
	case "GLM-4.5-Thinking":
		return "0727-360B-API"
	case "GLM-4.5-Search":
		return "0727-360B-API"
	case "GLM-4.6-Thinking":
		return "GLM-4-6-API-V1"
	default:
		debugLog("未知模型名称: %s，使用GLM-4.6作为默认", modelName)
		return "GLM-4-6-API-V1" // 默认使用GLM-4.6
	}
}

// 获取匿名token（每次对话使用不同token，避免共享记忆）
func getAnonymousToken() (string, error) {
	tokenURL := ORIGIN_BASE + "/api/v1/auths/"
	debugLog("获取匿名token: %s", tokenURL)

	client := &http.Client{Timeout: AUTH_TOKEN_TIMEOUT * time.Second}
	req, err := http.NewRequest("GET", tokenURL, nil)
	if err != nil {
		debugLog("创建获取匿名token请求失败: %v", err)
		return "", err
	}

	req.Header.Set("Accept", "*/*")
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
	req.Header.Set("User-Agent", BROWSER_UA)
	req.Header.Set("Referer", ORIGIN_BASE+"/")

	resp, err := client.Do(req)
	if err != nil {
		debugLog("获取匿名token请求失败: %v", err)
		return "", err
	}
	defer resp.Body.Close()

	debugLog("获取匿名token响应状态: %d", resp.StatusCode)

	if resp.StatusCode != http.StatusOK {
		// debugLog("获取匿名token失败: 状态码 %d", resp.StatusCode)
		return "", fmt.Errorf("获取匿名token失败: 状态码 %d", resp.StatusCode)
	}

	var body struct {
		Token string `json:"token"`
	}
	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
		bodyContent, _ := ioutil.ReadAll(resp.Body)
		debugLog("解析匿名token响应失败: %v, 响应内容: %s", err, string(bodyContent))
		return "", err
	}

	if body.Token == "" {
		// debugLog("匿名token为空")
		return "", fmt.Errorf("anon token empty")
	}

	debugLog("成功获取匿名token")
	return body.Token, nil
}

func main() {
	// 初始化配置
	initConfig()

	// 注册路由
	http.HandleFunc("/docs", handleAPIDocs)
	http.HandleFunc("/", handleDashboard)
	http.HandleFunc("/v1/models", handleModels)
	http.HandleFunc("/v1/chat/completions", handleChatCompletions)

	/////
	http.HandleFunc("/api/v1/models", handleModels)
	http.HandleFunc("/api/v1/chat/completions", handleChatCompletions)

	http.HandleFunc("/hf/v1/models", handleModels)
	http.HandleFunc("/hf/v1/chat/completions", handleChatCompletions)

	// Dashboard路由
	if DASHBOARD_ENABLED {
		http.HandleFunc("/dashboard", handleDashboard)
		http.HandleFunc("/dashboard/stats", handleDashboardStats)
		http.HandleFunc("/dashboard/requests", handleDashboardRequests)
		log.Printf("Dashboard已启用，访问地址: http://localhost%s/dashboard", PORT)
	}

	log.Printf("OpenAI兼容API服务器启动在端口%s", PORT)
	log.Printf("模型: %s", MODEL_NAME)
	log.Printf("上游: %s", UPSTREAM_URL)
	log.Printf("API密钥: %s", func() string {
		if len(DEFAULT_KEY) > TOKEN_DISPLAY_LENGTH {
			return DEFAULT_KEY[:TOKEN_DISPLAY_LENGTH] + "..."
		}
		return DEFAULT_KEY
	}())
	log.Printf("Debug模式: %v", DEBUG_MODE)
	log.Printf("默认流式响应: %v", DEFAULT_STREAM)
	log.Printf("Dashboard启用: %v", DASHBOARD_ENABLED)
	log.Printf("思考功能: %v", ENABLE_THINKING)
	log.Fatal(http.ListenAndServe(PORT, nil))
}

// Dashboard页面处理器
func handleDashboard(w http.ResponseWriter, r *http.Request) {
	// 只允许GET请求
	if r.Method != "GET" {
		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
		return
	}

	// 动态HTML模板，使用当前配置的模型名称
	tmpl := fmt.Sprintf(`<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>API调用看板</title>
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            max-width: 1200px;
            margin: 0 auto;
            background-color: white;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            padding: 20px;
        }
        h1 {
            color: #333;
            text-align: center;
            margin-bottom: 30px;
        }
        .stats-container {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 20px;
            margin-bottom: 30px;
        }
        .stat-card {
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            text-align: center;
            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
        }
        .stat-value {
            font-size: 24px;
            font-weight: bold;
            color: #007bff;
        }
        .stat-label {
            font-size: 14px;
            color: #6c757d;
            margin-top: 5px;
        }
        .requests-container {
            margin-top: 30px;
        }
        .requests-table {
            width: 100%%;
            border-collapse: collapse;
        }
        .requests-table th, .requests-table td {
            padding: 10px;
            text-align: left;
            border-bottom: 1px solid #ddd;
        }
        .requests-table th {
            background-color: #f8f9fa;
        }
        .status-success {
            color: #28a745;
        }
        .status-error {
            color: #dc3545;
        }
        .refresh-info {
            text-align: center;
            margin-top: 20px;
            color: #6c757d;
            font-size: 14px;
        }
        .pagination-container {
            display: flex;
            justify-content: center;
            align-items: center;
            margin-top: 20px;
            gap: 10px;
        }
        .pagination-container button {
            padding: 5px 10px;
            background-color: #007bff;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }
        .pagination-container button:disabled {
            background-color: #cccccc;
            cursor: not-allowed;
        }
        .pagination-container button:hover:not(:disabled) {
            background-color: #0056b3;
        }
        .chart-container {
            margin-top: 30px;
            height: 300px;
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>API调用看板</h1>

        <div class="stats-container">
            <div class="stat-card">
                <div class="stat-value" id="total-requests">0</div>
                <div class="stat-label">总请求数</div>
            </div>
            <div class="stat-card">
                <div class="stat-value" id="successful-requests">0</div>
                <div class="stat-label">成功请求</div>
            </div>
            <div class="stat-card">
                <div class="stat-value" id="failed-requests">0</div>
                <div class="stat-label">失败请求</div>
            </div>
            <div class="stat-card">
                <div class="stat-value" id="avg-response-time">0s</div>
                <div class="stat-label">平均响应时间</div>
            </div>
        </div>

        <div class="chart-container">
            <h2>请求统计图表</h2>
            <canvas id="requestsChart"></canvas>
        </div>

        <div class="requests-container">
            <h2>实时请求</h2>
            <table class="requests-table">
                <thead>
                    <tr>
                        <th>时间</th>
                        <th>模型</th>
                        <th>方法</th>
                        <th>路径</th>
                        <th>状态</th>
                        <th>耗时</th>
                        <th>User Agent</th>
                    </tr>
                </thead>
                <tbody id="requests-tbody">
                    <!-- 请求记录将通过JavaScript动态添加 -->
                </tbody>
            </table>
            <div class="pagination-container">
                <button id="prev-page" disabled>上一页</button>
                <span id="page-info">第 1 页，共 1 页</span>
                <button id="next-page" disabled>下一页</button>
            </div>
        </div>

        <div class="refresh-info">
            数据每5秒自动刷新一次
        </div>
    </div>

    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <script>
        // 全局变量
        let allRequests = [];
        let currentPage = 1;
        const itemsPerPage = 10;
        let requestsChart = null;

        // 更新统计数据
        function updateStats() {
            fetch('/dashboard/stats')
                .then(response => response.json())
                .then(data => {
                    document.getElementById('total-requests').textContent = data.TotalRequests;
                    document.getElementById('successful-requests').textContent = data.SuccessfulRequests;
                    document.getElementById('failed-requests').textContent = data.FailedRequests;
                    document.getElementById('avg-response-time').textContent = (data.AverageResponseTime / 1000000000).toFixed(2) + 's';
                })
                .catch(error => console.error('Error fetching stats:', error));
        }

        // 更新请求列表
        function updateRequests() {
            fetch('/dashboard/requests')
                .then(response => response.json())
                .then(data => {
                    // 检查数据是否为数组
                    if (!Array.isArray(data)) {
                        console.error('返回的数据不是数组:', data);
                        return;
                    }

                    // 保存所有请求数据
                    allRequests = data;

                    // 按时间倒序排列
                    allRequests.sort((a, b) => {
                        const timeA = new Date(a.timestamp);
                        const timeB = new Date(b.timestamp);
                        return timeB - timeA;
                    });

                    // 更新表格
                    updateTable();

                    // 更新图表
                    updateChart();

                    // 更新分页信息
                    updatePagination();
                })
                .catch(error => console.error('Error fetching requests:', error));
        }

        // 更新表格显示
        function updateTable() {
            const tbody = document.getElementById('requests-tbody');
            tbody.innerHTML = '';

            // 计算当前页的数据范围
            const startIndex = (currentPage - 1) * itemsPerPage;
            const endIndex = startIndex + itemsPerPage;
            const currentRequests = allRequests.slice(startIndex, endIndex);

            currentRequests.forEach(request => {
                const row = document.createElement('tr');

                // 格式化时间 - 检查时间戳是否有效
                let timeStr = "Invalid Date";
                if (request.timestamp) {
                    try {
                        const time = new Date(request.timestamp);
                        if (!isNaN(time.getTime())) {
                            timeStr = time.toLocaleTimeString();
                        }
                    } catch (e) {
                        console.error("时间格式化错误:", e);
                    }
                }

                // 状态样式
                const statusClass = request.status >= 200 && request.status < 300 ? 'status-success' : 'status-error';

                // 截断 User Agent，避免过长
                let userAgent = request.user_agent || "undefined";
                if (userAgent.length > 30) {
                    userAgent = userAgent.substring(0, 30) + "...";
                }

                row.innerHTML =
                   "<td>" + timeStr + "</td>" +
                   "<td>%s</td>" +
                   "<td>" + (request.method || "undefined") + "</td>" +
                   "<td>" + (request.path || "undefined") + "</td>" +
                   "<td class=\"" + statusClass + "\">" + (request.status || "undefined") + "</td>" +
                   "<td>" + ((request.duration / 1000).toFixed(2) || "undefined") + "s</td>" +
                   "<td title=\"" + (request.user_agent || "") + "\">" + userAgent + "</td>";

                tbody.appendChild(row);
            });
        }

        // 更新分页信息
        function updatePagination() {
            const totalPages = Math.ceil(allRequests.length / itemsPerPage);
            document.getElementById('page-info').textContent = "第 " + currentPage + " 页，共 " + totalPages + " 页";

            document.getElementById('prev-page').disabled = currentPage <= 1;
            document.getElementById('next-page').disabled = currentPage >= totalPages;
        }

        // 更新图表
        function updateChart() {
            const ctx = document.getElementById('requestsChart').getContext('2d');

            // 准备图表数据 - 最近20条请求的响应时间
            const chartData = allRequests.slice(0, 20).reverse();
            const labels = chartData.map(req => {
                const time = new Date(req.timestamp);
                return time.toLocaleTimeString();
            });
            const responseTimes = chartData.map(req => req.duration);

            // 如果图表已存在，先销毁
            if (requestsChart) {
                requestsChart.destroy();
            }

            // 创建新图表
            requestsChart = new Chart(ctx, {
                type: 'line',
                data: {
                    labels: labels,
                    datasets: [{
                        label: '响应时间 (s)',
                        data: responseTimes.map(time => time / 1000),
                        borderColor: '#007bff',
                        backgroundColor: 'rgba(0, 123, 255, 0.1)',
                        tension: 0.1,
                        fill: true
                    }]
                },
                options: {
                    responsive: true,
                    maintainAspectRatio: false,
                    scales: {
                        y: {
                            beginAtZero: true,
                            title: {
                                display: true,
                                text: '响应时间 (s)'
                            }
                        },
                        x: {
                            title: {
                                display: true,
                                text: '时间'
                            }
                        }
                    },
                    plugins: {
                        title: {
                            display: true,
                            text: '最近20条请求的响应时间趋势 (s)'
                        }
                    }
                }
            });
        }

        // 分页按钮事件
        document.getElementById('prev-page').addEventListener('click', function() {
            if (currentPage > 1) {
                currentPage--;
                updateTable();
                updatePagination();
            }
        });

        document.getElementById('next-page').addEventListener('click', function() {
            const totalPages = Math.ceil(allRequests.length / itemsPerPage);
            if (currentPage < totalPages) {
                currentPage++;
                updateTable();
                updatePagination();
            }
        });

        // 初始加载
        updateStats();
        updateRequests();

        // 定时刷新
        setInterval(updateStats, 5000);
        setInterval(updateRequests, 5000);
    </script>
</body>
</html>`, MODEL_NAME)

	w.Header().Set("Content-Type", "text/html; charset=utf-8")
	fmt.Fprint(w, tmpl)
}

// Dashboard统计数据处理器
func handleDashboardStats(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Type", "application/json")
	w.Write(getStatsData())
}

// Dashboard请求数据处理器
func handleDashboardRequests(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Type", "application/json")
	w.Write(getLiveRequestsData())
}

// API文档页面处理器
func handleAPIDocs(w http.ResponseWriter, r *http.Request) {
	// 只允许GET请求
	if r.Method != "GET" {
		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
		return
	}

	// 动态API文档HTML模板，使用当前配置的模型名称
	tmpl := fmt.Sprintf(`<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>ZtoApi 文档</title>
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f5f5f5;
            line-height: 1.6;
        }
        .container {
            max-width: 1200px;
            margin: 0 auto;
            background-color: white;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            padding: 30px;
        }
        h1 {
            color: #333;
            text-align: center;
            margin-bottom: 30px;
            border-bottom: 2px solid #007bff;
            padding-bottom: 10px;
        }
        h2 {
            color: #007bff;
            margin-top: 30px;
            margin-bottom: 15px;
        }
        h3 {
            color: #333;
            margin-top: 25px;
            margin-bottom: 10px;
        }
        .endpoint {
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            margin-bottom: 20px;
            border-left: 4px solid #007bff;
        }
        .method {
            display: inline-block;
            padding: 4px 8px;
            border-radius: 4px;
            color: white;
            font-weight: bold;
            margin-right: 10px;
            font-size: 14px;
        }
        .get { background-color: #28a745; }
        .post { background-color: #007bff; }
        .path {
            font-family: monospace;
            background-color: #e9ecef;
            padding: 2px 6px;
            border-radius: 3px;
            font-size: 16px;
        }
        .description {
            margin: 15px 0;
        }
        .parameters {
            margin: 15px 0;
        }
        table {
            width: 100%%;
            border-collapse: collapse;
            margin: 15px 0;
        }
        th, td {
            padding: 10px;
            text-align: left;
            border-bottom: 1px solid #ddd;
        }
        th {
            background-color: #f8f9fa;
            font-weight: bold;
        }
        .example {
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            margin: 15px 0;
            font-family: monospace;
            white-space: pre-wrap;
            overflow-x: auto;
        }
        .note {
            background-color: #fff3cd;
            border-left: 4px solid #ffc107;
            padding: 10px 15px;
            margin: 15px 0;
            border-radius: 0 4px 4px 0;
        }
        .response {
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            margin: 15px 0;
            font-family: monospace;
            white-space: pre-wrap;
            overflow-x: auto;
        }
        .tab {
            overflow: hidden;
            border: 1px solid #ccc;
            background-color: #f1f1f1;
            border-radius: 4px 4px 0 0;
        }
        .tab button {
            background-color: inherit;
            float: left;
            border: none;
            outline: none;
            cursor: pointer;
            padding: 14px 16px;
            transition: 0.3s;
            font-size: 16px;
        }
        .tab button:hover {
            background-color: #ddd;
        }
        .tab button.active {
            background-color: #ccc;
        }
        .tabcontent {
            display: none;
            padding: 6px 12px;
            border: 1px solid #ccc;
            border-top: none;
            border-radius: 0 0 4px 4px;
        }
        .toc {
            background-color: #f8f9fa;
            border-radius: 6px;
            padding: 15px;
            margin-bottom: 20px;
        }
        .toc ul {
            padding-left: 20px;
        }
        .toc li {
            margin: 5px 0;
        }
        .toc a {
            color: #007bff;
            text-decoration: none;
        }
        .toc a:hover {
            text-decoration: underline;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>ZtoApi 文档</h1>

        <div class="toc">
            <h2>目录</h2>
            <ul>
                <li><a href="#overview">概述</a></li>
                <li><a href="#authentication">身份验证</a></li>
                <li><a href="#endpoints">API端点</a>
                    <ul>
                        <li><a href="#models">获取模型列表</a></li>
                        <li><a href="#chat-completions">聊天完成</a></li>
                    </ul>
                </li>
                <li><a href="#examples">使用示例</a></li>
                <li><a href="#error-handling">错误处理</a></li>
            </ul>
        </div>

        <section id="overview">
            <h2>概述</h2>
            <p>这是一个为Z.ai %s模型提供OpenAI兼容API接口的代理服务器。它允许你使用标准的OpenAI API格式与Z.ai的%s模型进行交互，支持流式和非流式响应。</p>
            <p><strong>基础URL:</strong> <code>http://localhost:7860/v1</code></p>
            <div class="note">
                <strong>注意:</strong> 默认端口为7860，可以通过环境变量PORT进行修改。
            </div>
        </section>

        <section id="authentication">
            <h2>身份验证</h2>
            <p>所有API请求都需要在请求头中包含有效的API密钥进行身份验证：</p>
            <div class="example">
Authorization: Bearer your-api-key</div>
            <p>默认的API密钥为 <code>sk-your-key</code>，可以通过环境变量 <code>DEFAULT_KEY</code> 进行修改。</p>
        </section>

        <section id="endpoints">
            <h2>API端点</h2>

            <div class="endpoint" id="models">
                <h3>获取模型列表</h3>
                <div>
                    <span class="method get">GET</span>
                    <span class="path">/v1/models</span>
                </div>
                <div class="description">
                    <p>获取可用模型列表。</p>
                </div>
                <div class="parameters">
                    <h4>请求参数</h4>
                    <p>无</p>
                </div>
                <div class="response">
{
  "object": "list",
  "data": [
    {
      "id": "%s",
      "object": "model",
      "created": 1756788845,
      "owned_by": "z.ai"
    }
  ]
}</div>
            </div>

            <div class="endpoint" id="chat-completions">
                <h3>聊天完成</h3>
                <div>
                    <span class="method post">POST</span>
                    <span class="path">/v1/chat/completions</span>
                </div>
                <div class="description">
                    <p>基于消息列表生成模型响应。支持流式和非流式两种模式。</p>
                </div>
                <div class="parameters">
                    <h4>请求参数</h4>
                    <table>
                        <thead>
                            <tr>
                                <th>参数名</th>
                                <th>类型</th>
                                <th>必需</th>
                                <th>说明</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td>model</td>
                                <td>string</td>
                                <td>是</td>
                                <td>要使用的模型ID，例如 "%s"</td>
                            </tr>
                            <tr>
                                <td>messages</td>
                                <td>array</td>
                                <td>是</td>
                                <td>消息列表，包含角色和内容</td>
                            </tr>
                            <tr>
                                <td>stream</td>
                                <td>boolean</td>
                                <td>否</td>
                                <td>是否使用流式响应，默认为true</td>
                            </tr>
                            <tr>
                                <td>temperature</td>
                                <td>number</td>
                                <td>否</td>
                                <td>采样温度，控制随机性</td>
                            </tr>
                            <tr>
                               <td>max_tokens</td>
                               <td>integer</td>
                               <td>否</td>
                               <td>生成的最大令牌数</td>
                           </tr>
                           <tr>
                               <td>enable_thinking</td>
                               <td>boolean</td>
                               <td>否</td>
                               <td>是否启用思考功能，默认使用环境变量 ENABLE_THINKING 的值</td>
                           </tr>
                        </tbody>
                    </table>
                </div>
                <div class="parameters">
                    <h4>消息格式</h4>
                    <table>
                        <thead>
                            <tr>
                                <th>字段</th>
                                <th>类型</th>
                                <th>说明</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td>role</td>
                                <td>string</td>
                                <td>消息角色，可选值：system、user、assistant</td>
                            </tr>
                            <tr>
                                <td>content</td>
                                <td>string</td>
                                <td>消息内容</td>
                            </tr>
                        </tbody>
                    </table>
                </div>
            </div>
        </section>

        <section id="examples">
            <h2>使用示例</h2>

            <div class="tab">
                <button class="tablinks active" onclick="openTab(event, 'python-tab')">Python</button>
                <button class="tablinks" onclick="openTab(event, 'curl-tab')">cURL</button>
                <button class="tablinks" onclick="openTab(event, 'javascript-tab')">JavaScript</button>
            </div>

            <div id="python-tab" class="tabcontent" style="display: block;">
                <h3>Python示例</h3>
                <div class="example">
import openai

# 配置客户端
client = openai.OpenAI(
    api_key="your-api-key",  # 对应 DEFAULT_KEY
    base_url="http://localhost:7860/v1"
)

# 非流式请求
response = client.chat.completions.create(
    model="%s",
    messages=[{"role": "user", "content": "你好，请介绍一下自己"}]
)

print(response.choices[0].message.content)

# 流式请求
response = client.chat.completions.create(
    model="%s",
    messages=[{"role": "user", "content": "请写一首关于春天的诗"}],
    stream=True
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")</div>
            </div>

            <div id="curl-tab" class="tabcontent">
                <h3>cURL示例</h3>
                <div class="example">
# 非流式请求
curl -X POST http://localhost:7860/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-api-key" \
  -d '{
    "model": "GLM-4.6",
    "messages": [{"role": "user", "content": "你好"}],
    "stream": false
  }'

# 流式请求
curl -X POST http://localhost:7860/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-api-key" \
  -d '{
    "model": "GLM-4.6",
    "messages": [{"role": "user", "content": "你好"}],
    "stream": true
  }'</div>

# 启用思考功能的请求
curl -X POST http://localhost:7860/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-api-key" \
  -d '{
    "model": "GLM-4.6",
    "messages": [{"role": "user", "content": "请分析一下这个问题"}],
    "enable_thinking": true
  }'
            </div>

            <div id="javascript-tab" class="tabcontent">
                <h3>JavaScript示例</h3>
                <div class="example">
const fetch = require('node-fetch');

async function chatWithGLM(message, stream = false) {
  const response = await fetch('http://localhost:7860/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'Authorization': 'Bearer your-api-key'
    },
    body: JSON.stringify({
      model: '%s',
      messages: [{ role: 'user', content: message }],
      stream: stream
    })
  });

  if (stream) {
    // 处理流式响应
    const reader = response.body.getReader();
    const decoder = new TextDecoder();

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      const chunk = decoder.decode(value);
      const lines = chunk.split('\n');

      for (const line of lines) {
        if (line.startsWith('data: ')) {
          const data = line.slice(6);
          if (data === '[DONE]') {
            console.log('\n流式响应完成');
            return;
          }

          try {
            const parsed = JSON.parse(data);
            const content = parsed.choices[0]?.delta?.content;
            if (content) {
              process.stdout.write(content);
            }
          } catch (e) {
            // 忽略解析错误
          }
        }
      }
    }
  } else {
    // 处理非流式响应
    const data = await response.json();
    console.log(data.choices[0].message.content);
  }
}

// 使用示例
chatWithGLM('你好，请介绍一下JavaScript', false);</div>
            </div>
        </section>

        <section id="error-handling">
            <h2>错误处理</h2>
            <p>API使用标准HTTP状态码来表示请求的成功或失败：</p>
            <table>
                <thead>
                    <tr>
                        <th>状态码</th>
                        <th>说明</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>200 OK</td>
                        <td>请求成功</td>
                    </tr>
                    <tr>
                        <td>400 Bad Request</td>
                        <td>请求格式错误或参数无效</td>
                    </tr>
                    <tr>
                        <td>401 Unauthorized</td>
                        <td>API密钥无效或缺失</td>
                    </tr>
                    <tr>
                        <td>502 Bad Gateway</td>
                        <td>上游服务错误</td>
                    </tr>
                </tbody>
            </table>
            <div class="note">
                <strong>注意:</strong> 在调试模式下，服务器会输出详细的日志信息，可以通过设置环境变量 DEBUG_MODE=true 来启用。
            </div>
        </section>
    </div>

    <script>
        function openTab(evt, tabName) {
            var i, tabcontent, tablinks;
            tabcontent = document.getElementsByClassName("tabcontent");
            for (i = 0; i < tabcontent.length; i++) {
                tabcontent[i].style.display = "none";
            }
            tablinks = document.getElementsByClassName("tablinks");
            for (i = 0; i < tablinks.length; i++) {
                tablinks[i].className = tablinks[i].className.replace(" active", "");
            }
            document.getElementById(tabName).style.display = "block";
            evt.currentTarget.className += " active";
        }
    </script>
</body>
</html>`, MODEL_NAME, MODEL_NAME, MODEL_NAME, MODEL_NAME, MODEL_NAME, MODEL_NAME, MODEL_NAME)

	w.Header().Set("Content-Type", "text/html; charset=utf-8")
	fmt.Fprint(w, tmpl)
}

func handleOptions(w http.ResponseWriter, r *http.Request) {
	setCORSHeaders(w)
	if r.Method == "OPTIONS" {
		w.WriteHeader(http.StatusOK)
		return
	}
	w.WriteHeader(http.StatusNotFound)
}

func setCORSHeaders(w http.ResponseWriter) {
	w.Header().Set("Access-Control-Allow-Origin", "*")
	w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
	w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
	w.Header().Set("Access-Control-Allow-Credentials", "true")
}

func handleModels(w http.ResponseWriter, r *http.Request) {
	startTime := time.Now()
	setCORSHeaders(w)
	if r.Method == "OPTIONS" {
		w.WriteHeader(http.StatusOK)
		return
	}
	path := r.URL.Path
	userAgent := r.UserAgent()

	response := ModelsResponse{
		Object: "list",
		Data: []Model{
			{
				ID:      "GLM-4.6",
				Object:  "model",
				Created: time.Now().Unix(),
				OwnedBy: "z.ai",
			},
		},
	}

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)
	// 记录请求统计
	duration := time.Since(startTime)
	addLiveRequest(r.Method, path, http.StatusOK, duration, "", userAgent)
}

// 总结多条消息为一条上下文信息
func summarizeMessages(messages []Message) Message {
	// 构建消息摘要文本
	var summaryBuilder strings.Builder
	summaryBuilder.WriteString("以下是前15条消息的总结：\n\n")

	for _, msg := range messages {
		if msg.Role == "system" {
			summaryBuilder.WriteString("系统提示：")
		} else if msg.Role == "user" {
			summaryBuilder.WriteString("用户：")
		} else if msg.Role == "assistant" {
			summaryBuilder.WriteString("助手：")
		}
		// 只保留每条消息的前200个字符，进一步减少内容量
		content := msg.Content
		if len(content) > 200 {
			content = content[:200] + "..."
		}
		summaryBuilder.WriteString(content)
		summaryBuilder.WriteString("\n\n")
	}

	// 生成最终的系统消息作为上下文摘要
	return Message{
		Role:    "system",
		Content: summaryBuilder.String() + "请基于以上对话历史，继续回复用户的最新问题。",
	}
}

func handleChatCompletions(w http.ResponseWriter, r *http.Request) {
	startTime := time.Now()
	path := r.URL.Path
	clientIP := getClientIP(r)
	userAgent := r.UserAgent()

	setCORSHeaders(w)
	if r.Method == "OPTIONS" {
		w.WriteHeader(http.StatusOK)
		return
	}

	debugLog("收到chat completions请求")

	// // 验证API Key
	// authHeader := r.Header.Get("Authorization")
	// if !strings.HasPrefix(authHeader, "Bearer ") {
	// 	debugLog("缺少或无效的Authorization头")
	// 	http.Error(w, "Missing or invalid Authorization header", http.StatusUnauthorized)
	// 	// 记录请求统计
	// 	duration := time.Since(startTime)
	// 	recordRequestStats(startTime, path, http.StatusUnauthorized)
	// 	addLiveRequest(r.Method, path, http.StatusUnauthorized, duration, "", userAgent)
	// 	return
	// }

	// apiKey := strings.TrimPrefix(authHeader, "Bearer ")
	// if apiKey != DEFAULT_KEY {
	// 	debugLog("无效的API key: %s", apiKey)
	// 	http.Error(w, "Invalid API key", http.StatusUnauthorized)
	// 	// 记录请求统计
	// 	duration := time.Since(startTime)
	// 	recordRequestStats(startTime, path, http.StatusUnauthorized)
	// 	addLiveRequest(r.Method, path, http.StatusUnauthorized, duration, "", userAgent)
	// 	return
	// }

	// debugLog("API key验证通过")

	// 读取请求体
	body, err := io.ReadAll(r.Body)
	if err != nil {
		debugLog("读取请求体失败: %v", err)
		http.Error(w, "Failed to read request body", http.StatusBadRequest)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadRequest)
		addLiveRequest(r.Method, path, http.StatusBadRequest, duration, "", userAgent)
		return
	}

	// 解析请求
	var req OpenAIRequest
	if err := json.Unmarshal(body, &req); err != nil {
		debugLog("JSON解析失败: %v", err)
		http.Error(w, "Invalid JSON", http.StatusBadRequest)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadRequest)
		addLiveRequest(r.Method, path, http.StatusBadRequest, duration, "", userAgent)
		return
	}

	// 如果客户端没有明确指定stream参数，使用默认值
	if !bytes.Contains(body, []byte(`"stream"`)) {
		req.Stream = DEFAULT_STREAM
		debugLog("客户端未指定stream参数，使用默认值: %v", DEFAULT_STREAM)
	}

	// 处理对话次数超限情况：超过30次自动总结
	var processedMessages []Message
	if len(req.Messages) > 15 { // 进一步降低自动总结的阈值，从20条改为15条
		debugLog("对话次数超过15次，自动进行总结")
		// 总结前15条消息
		summaryMessage := summarizeMessages(req.Messages[:15])
		// 只保留最近的3条消息，大幅减少发送到上游的消息总量
		recentMessages := []Message{}
		if len(req.Messages) > 15 {
			// 只保留最近的3条消息
			startIndex := len(req.Messages) - 3
			if startIndex < 15 {
				startIndex = 15
			}
			recentMessages = req.Messages[startIndex:]
		}
		// 合并总结和最近的消息
		processedMessages = append([]Message{summaryMessage}, recentMessages...)
		debugLog("总结完成，消息数从%d减少到%d", len(req.Messages), len(processedMessages))
	} else {
		processedMessages = req.Messages
	}

	// debugLog("请求解析成功 - 模型: %s, 流式: %v, 消息数: %d", req.Model, req.Stream, len(processedMessages))

	// 生成会话相关ID
	chatID := fmt.Sprintf("%d-%d", time.Now().UnixNano(), time.Now().Unix())
	msgID := fmt.Sprintf("%d", time.Now().UnixNano())

	// 决定是否启用思考功能：根据模型名称判断
	enableThinking := strings.Contains(strings.ToLower(req.Model), "thinking")
	debugLog("根据模型名称启用思考功能: %v (模型: %s)", enableThinking, req.Model)

	// 构造上游请求
	upstreamReq := UpstreamRequest{
		Stream:   true, // 总是使用流式从上游获取
		ChatID:   chatID,
		ID:       msgID,
		Model:    getUpstreamModelID(req.Model), // 使用用户请求中的模型名称
		Messages: processedMessages,             // 使用处理后的消息列表
		Params:   map[string]interface{}{},
		Features: map[string]interface{}{
			"enable_thinking": enableThinking,
		},
		BackgroundTasks: map[string]bool{
			"title_generation": false,
			"tags_generation":  false,
		},
		MCPServers: []string{},
		ModelItem: struct {
			ID      string `json:"id"`
			Name    string `json:"name"`
			OwnedBy string `json:"owned_by"`
		}{ID: getUpstreamModelID(req.Model), Name: req.Model, OwnedBy: "openai"},
		ToolServers: []string{},
		Variables: map[string]string{
			"{{USER_NAME}}":        "User",
			"{{USER_LOCATION}}":    "Unknown",
			"{{CURRENT_DATETIME}}": time.Now().Format("2006-01-02 15:04:05"),
		},
	}

	// 选择本次对话使用的token：优先使用配置的ZAI_TOKEN，否则获取匿名token
	authToken := ZAI_TOKEN
	if authToken == "" && ANON_TOKEN_ENABLED {
		if t, err := getAnonymousToken(); err == nil {
			authToken = t
			// debugLog("使用匿名token: %s...", func() string {
			// 	if len(t) > TOKEN_DISPLAY_LENGTH {
			// 		return t[:TOKEN_DISPLAY_LENGTH]
			// 	}
			// 	return t
			// }())
		} else {
			debugLog("匿名token获取失败: %v", err)
		}
	} else if authToken != "" {
		debugLog("使用配置的ZAI_TOKEN: %s...", func() string {
			if len(authToken) > TOKEN_DISPLAY_LENGTH {
				return authToken[:TOKEN_DISPLAY_LENGTH]
			}
			return authToken
		}())
	}

	// 调用上游API
	if req.Stream {
		handleStreamResponseWithIDs(w, upstreamReq, chatID, authToken, startTime, path, clientIP, userAgent)
	} else {
		handleNonStreamResponseWithIDs(w, upstreamReq, chatID, authToken, startTime, path, clientIP, userAgent)
	}
}

// 从JWT token中提取user_id
func extractUserIDFromToken(token string) string {
	parts := strings.Split(token, ".")
	if len(parts) < 2 {
		return "guest"
	}

	// Base64解码payload部分
	payloadRaw := parts[1]
	// 清理payload，移除任何无效字符
	payloadRaw = strings.Map(func(r rune) rune {
		if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') ||
			(r >= '0' && r <= '9') || r == '-' || r == '_' {
			return r
		}
		return -1
	}, payloadRaw)

	// 添加缺失的padding
	numPadding := (-len(payloadRaw)) % 4
	if numPadding > 0 {
		payloadRaw += strings.Repeat("=", numPadding)
	}

	// 使用更健壮的Base64解码方法，参考Python的urlsafe_b64decode
	var payloadBytes []byte
	var err error
	// 先尝试使用RawURLEncoding（最接近Python的urlsafe_b64decode）
	payloadBytes, err = base64.RawURLEncoding.DecodeString(payloadRaw)
	if err != nil {
		// 如果失败，尝试使用普通的URLEncoding
		payloadBytes, err = base64.URLEncoding.DecodeString(payloadRaw)
		if err != nil {
			// 如果仍然失败，尝试使用StdEncoding
			payloadBytes, err = base64.StdEncoding.DecodeString(payloadRaw)
			if err != nil {
				debugLog("JWT payload解码失败: %v", err)
				return "guest"
			}
		}
	}

	// 解析JSON payload（增强容错能力）
	var payload map[string]interface{}
	if err := json.Unmarshal(payloadBytes, &payload); err != nil {
		// 如果JSON解析失败，尝试使用更宽松的解析方法
		// 1. 尝试清理可能包含非UTF-8字符的字节数组
		cleanBytes := make([]byte, 0, len(payloadBytes))
		for _, b := range payloadBytes {
			if b >= 32 && b <= 126 || b == '\n' || b == '\r' || b == '\t' {
				cleanBytes = append(cleanBytes, b)
			} else if b >= 128 {
				// 保留UTF-8多字节字符的开始字节
				cleanBytes = append(cleanBytes, b)
			}
		}
		// 2. 再次尝试解析清理后的字节数组
		if err := json.Unmarshal(cleanBytes, &payload); err != nil {
			debugLog("JWT payload解析失败: %v", err)
			return "guest"
		}
	}

	// 尝试多个可能的user_id字段
	userIDKeys := []string{"id", "user_id", "uid", "sub"}
	for _, key := range userIDKeys {
		if val, exists := payload[key]; exists && val != nil {
			// 将值转换为字符串
			return fmt.Sprintf("%v", val)
		}
	}

	return "guest"
}

// 生成双层HMAC-SHA256签名
func generateSignature(messageText string, requestID string, timestampMs int64, userID string) string {
	signingSecret := "junjie" // Z.AI的默认签名密钥

	// 计算时间窗口索引（5分钟窗口）
	windowIndex := timestampMs / (5 * 60 * 1000)

	// Layer1: 派生密钥
	rootKey := []byte(signingSecret)
	h := hmac.New(sha256.New, rootKey)
	h.Write([]byte(fmt.Sprintf("%d", windowIndex)))
	derivedHex := fmt.Sprintf("%x", h.Sum(nil))

	// Layer2: 生成签名
	encodedMessage := base64.StdEncoding.EncodeToString([]byte(messageText))
	canonicalString := fmt.Sprintf(
		"requestId,%s,timestamp,%d,user_id,%s|%s|%d",
		requestID, timestampMs, userID, encodedMessage, timestampMs,
	)
	h2 := hmac.New(sha256.New, []byte(derivedHex))
	h2.Write([]byte(canonicalString))
	signature := fmt.Sprintf("%x", h2.Sum(nil))

	return signature
}

func callUpstreamWithHeaders(upstreamReq UpstreamRequest, refererChatID string, authToken string) (*http.Response, error) {
	reqBody, err := json.Marshal(upstreamReq)
	if err != nil {
		debugLog("上游请求序列化失败: %v", err)
		return nil, err
	}

	// 构建带URL参数的完整URL
	baseURL := UPSTREAM_URL
	timestampMs := time.Now().UnixMilli()

	// 生成UUID
	requestID := uuid.New().String()
	userID := extractUserIDFromToken(authToken)

	// 提取最后一条用户消息用于签名
	lastUserMessage := ""
	if len(upstreamReq.Messages) > 0 {
		for i := len(upstreamReq.Messages) - 1; i >= 0; i-- {
			if upstreamReq.Messages[i].Role == "user" {
				lastUserMessage = upstreamReq.Messages[i].Content
				break
			}
		}
	}

	// 生成签名
	signature := generateSignature(lastUserMessage, requestID, timestampMs, userID)

	// 构建URL参数
	queryParams := url.Values{}
	queryParams.Set("timestamp", fmt.Sprintf("%d", timestampMs))
	queryParams.Set("requestId", requestID)
	queryParams.Set("user_id", userID)
	queryParams.Set("token", authToken)
	queryParams.Set("current_url", ORIGIN_BASE+"/c/"+refererChatID)
	queryParams.Set("pathname", fmt.Sprintf("/c/%s", refererChatID))
	queryParams.Set("signature_timestamp", fmt.Sprintf("%d", timestampMs))

	fullURL := fmt.Sprintf("%s?%s", baseURL, queryParams.Encode())

	debugLog("调用上游API: %s", fullURL)
	// debugLog("上游请求体: %s", string(reqBody))

	req, err := http.NewRequest("POST", fullURL, bytes.NewBuffer(reqBody))
	if err != nil {
		debugLog("创建HTTP请求失败: %v", err)
		return nil, err
	}

	// 设置请求头
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("Accept", "application/json, text/event-stream")
	req.Header.Set("User-Agent", BROWSER_UA)
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
	req.Header.Set("Authorization", "Bearer "+authToken)
	req.Header.Set("X-Signature", signature)
	req.Header.Set("X-FE-Version", "prod-fe-1.0.69")
	req.Header.Set("Origin", ORIGIN_BASE)
	req.Header.Set("Referer", ORIGIN_BASE+"/c/"+refererChatID)

	client := &http.Client{Timeout: UPSTREAM_TIMEOUT * time.Second}
	resp, err := client.Do(req)
	if err != nil {
		debugLog("上游请求失败: %v", err)
		return nil, err
	}

	debugLog("上游响应状态: %d  ---  %s", resp.StatusCode, resp.Status)
	return resp, nil
}

func handleStreamResponseWithIDs(w http.ResponseWriter, upstreamReq UpstreamRequest, chatID string, authToken string, startTime time.Time, path string, clientIP, userAgent string) {
	// debugLog("开始处理流式响应 (chat_id=%s)", chatID)

	resp, err := callUpstreamWithHeaders(upstreamReq, chatID, authToken)
	if err != nil {
		debugLog("调用上游失败: %v", err)
		http.Error(w, "Failed to call upstream", http.StatusBadGateway)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadGateway)
		addLiveRequest("POST", path, http.StatusBadGateway, duration, "", userAgent)
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		debugLog("上游返回错误状态: %d", resp.StatusCode)
		// 读取错误响应体
		if DEBUG_MODE {
			body, _ := io.ReadAll(resp.Body)
			debugLog("上游错误响应: %s", string(body))
		}
		http.Error(w, "Upstream error", http.StatusBadGateway)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadGateway)
		addLiveRequest("POST", path, http.StatusBadGateway, duration, "", userAgent)
		return
	}

	// 策略2：总是展示thinking + answer

	// 设置SSE头部
	w.Header().Set("Content-Type", "text/event-stream")
	w.Header().Set("Cache-Control", "no-cache")
	w.Header().Set("Connection", "keep-alive")

	flusher, ok := w.(http.Flusher)
	if !ok {
		http.Error(w, "Streaming unsupported", http.StatusInternalServerError)
		return
	}

	// 发送第一个chunk（role）
	firstChunk := OpenAIResponse{
		ID:      fmt.Sprintf("chatcmpl-%d", time.Now().Unix()),
		Object:  "chat.completion.chunk",
		Created: time.Now().Unix(),
		Model:   MODEL_NAME,
		Choices: []Choice{
			{
				Index: 0,
				Delta: Delta{Role: "assistant"},
			},
		},
	}
	writeSSEChunk(w, firstChunk)
	flusher.Flush()

	// 读取上游SSE流
	debugLog("开始读取上游SSE流")
	scanner := bufio.NewScanner(resp.Body)
	lineCount := 0

	for scanner.Scan() {
		line := scanner.Text()
		lineCount++

		if !strings.HasPrefix(line, "data: ") {
			continue
		}

		dataStr := strings.TrimPrefix(line, "data: ")
		if dataStr == "" {
			continue
		}

		// debugLog("收到SSE数据 (第%d行): %s", lineCount, dataStr)

		var upstreamData UpstreamData
		if err := json.Unmarshal([]byte(dataStr), &upstreamData); err != nil {
			debugLog("SSE数据解析失败: %v", err)
			continue
		}

		// 错误检测（data.error 或 data.data.error 或 顶层error）
		if (upstreamData.Error != nil) || (upstreamData.Data.Error != nil) || (upstreamData.Data.Inner != nil && upstreamData.Data.Inner.Error != nil) {
			errObj := upstreamData.Error
			if errObj == nil {
				errObj = upstreamData.Data.Error
			}
			if errObj == nil && upstreamData.Data.Inner != nil {
				errObj = upstreamData.Data.Inner.Error
			}
			debugLog("上游错误: code=%d, detail=%s", errObj.Code, errObj.Detail)
			// 结束下游流
			endChunk := OpenAIResponse{
				ID:      fmt.Sprintf("chatcmpl-%d", time.Now().Unix()),
				Object:  "chat.completion.chunk",
				Created: time.Now().Unix(),
				Model:   MODEL_NAME,
				Choices: []Choice{{Index: 0, Delta: Delta{}, FinishReason: "stop"}},
			}
			writeSSEChunk(w, endChunk)
			fmt.Fprintf(w, "data: [DONE]\n\n")
			flusher.Flush()
			break
		}

		// debugLog("解析成功 - 类型: %s, 阶段: %s, 内容长度: %d, 完成: %v",
		// 	upstreamData.Type, upstreamData.Data.Phase, len(upstreamData.Data.DeltaContent), upstreamData.Data.Done)

		// 策略2：总是展示thinking + answer
		if upstreamData.Data.DeltaContent != "" {
			var out = upstreamData.Data.DeltaContent
			if upstreamData.Data.Phase == "thinking" {
				out = transformThinkingContent(out)
			}
			if out != "" {
				// debugLog("发送内容(%s): %s", upstreamData.Data.Phase, out)
				chunk := OpenAIResponse{
					ID:      fmt.Sprintf("chatcmpl-%d", time.Now().Unix()),
					Object:  "chat.completion.chunk",
					Created: time.Now().Unix(),
					Model:   MODEL_NAME,
					Choices: []Choice{
						{
							Index: 0,
							Delta: Delta{Content: out},
						},
					},
				}
				writeSSEChunk(w, chunk)
				flusher.Flush()
			}
		}

		// 检查是否结束
		if upstreamData.Data.Done || upstreamData.Data.Phase == "done" {
			// debugLog("检测到流结束信号")
			// 发送结束chunk
			endChunk := OpenAIResponse{
				ID:      fmt.Sprintf("chatcmpl-%d", time.Now().Unix()),
				Object:  "chat.completion.chunk",
				Created: time.Now().Unix(),
				Model:   MODEL_NAME,
				Choices: []Choice{
					{
						Index:        0,
						Delta:        Delta{},
						FinishReason: "stop",
					},
				},
			}
			writeSSEChunk(w, endChunk)
			flusher.Flush()

			// 发送[DONE]
			fmt.Fprintf(w, "data: [DONE]\n\n")
			flusher.Flush()
			// debugLog("流式响应完成，共处理%d行", lineCount)
			break
		}
	}

	if err := scanner.Err(); err != nil {
		debugLog("扫描器错误: %v", err)
	}

	// 记录成功请求统计
	duration := time.Since(startTime)
	recordRequestStats(startTime, path, http.StatusOK)
	addLiveRequest("POST", path, http.StatusOK, duration, "", userAgent)
}

func writeSSEChunk(w http.ResponseWriter, chunk OpenAIResponse) {
	data, _ := json.Marshal(chunk)
	fmt.Fprintf(w, "data: %s\n\n", data)
}

func handleNonStreamResponseWithIDs(w http.ResponseWriter, upstreamReq UpstreamRequest, chatID string, authToken string, startTime time.Time, path string, clientIP, userAgent string) {
	debugLog("开始处理非流式响应 (chat_id=%s)", chatID)

	resp, err := callUpstreamWithHeaders(upstreamReq, chatID, authToken)
	if err != nil {
		debugLog("调用上游失败: %v", err)
		http.Error(w, "Failed to call upstream", http.StatusBadGateway)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadGateway)
		addLiveRequest("POST", path, http.StatusBadGateway, duration, "", userAgent)
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		debugLog("上游返回错误状态: %d", resp.StatusCode)
		// 读取错误响应体
		if DEBUG_MODE {
			body, _ := io.ReadAll(resp.Body)
			debugLog("上游错误响应: %s", string(body))
		}
		http.Error(w, "Upstream error", http.StatusBadGateway)
		// 记录请求统计
		duration := time.Since(startTime)
		recordRequestStats(startTime, path, http.StatusBadGateway)
		addLiveRequest("POST", path, http.StatusBadGateway, duration, "", userAgent)
		return
	}

	// 收集完整响应（策略2：thinking与answer都纳入，thinking转换）
	var fullContent strings.Builder
	scanner := bufio.NewScanner(resp.Body)
	debugLog("开始收集完整响应内容")
	lineCount := 0

	for scanner.Scan() {
		line := scanner.Text()
		lineCount++

		debugLog("收到原始行[%d]: %s", lineCount, line)

		if !strings.HasPrefix(line, "data: ") {
			continue
		}

		dataStr := strings.TrimPrefix(line, "data: ")
		if dataStr == "" {
			continue
		}

		debugLog("解析SSE数据: %s", dataStr)

		var upstreamData UpstreamData
		if err := json.Unmarshal([]byte(dataStr), &upstreamData); err != nil {
			debugLog("JSON解析失败: %v", err)
			continue
		}

		debugLog("解析成功 - type:%s phase:%s content_len:%d done:%v",
			upstreamData.Type, upstreamData.Data.Phase,
			len(upstreamData.Data.DeltaContent), upstreamData.Data.Done)

		if upstreamData.Data.DeltaContent != "" {
			out := upstreamData.Data.DeltaContent
			if upstreamData.Data.Phase == "thinking" {
				out = transformThinkingContent(out)
			}
			if out != "" {
				debugLog("添加内容: %s", out)
				fullContent.WriteString(out)
			}
		}

		if upstreamData.Data.Done || upstreamData.Data.Phase == "done" {
			debugLog("检测到完成信号，停止收集")
			break
		}
	}

	debugLog("扫描器共处理%d行", lineCount)

	finalContent := fullContent.String()
	debugLog("内容收集完成，最终长度: %d", len(finalContent))

	// 构造完整响应
	response := OpenAIResponse{
		ID:      fmt.Sprintf("chatcmpl-%d", time.Now().Unix()),
		Object:  "chat.completion",
		Created: time.Now().Unix(),
		Model:   MODEL_NAME,
		Choices: []Choice{
			{
				Index: 0,
				Message: Message{
					Role:    "assistant",
					Content: finalContent,
				},
				FinishReason: "stop",
			},
		},
		Usage: Usage{
			PromptTokens:     0,
			CompletionTokens: 0,
			TotalTokens:      0,
		},
	}

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(response)
	debugLog("非流式响应发送完成")

	// 记录成功请求统计
	duration := time.Since(startTime)
	recordRequestStats(startTime, path, http.StatusOK)
	addLiveRequest("POST", path, http.StatusOK, duration, "", userAgent)
}