h3 / main.go
sanbo
update sth at 2025-10-11 20:29:48
25499c9
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"math/rand"
"net/http"
"regexp"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
// ==================== 数据结构定义 ====================
// OpenAI API 数据结构
type ChatCompletionRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
MaxTokens int `json:"max_tokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Stream bool `json:"stream,omitempty"`
}
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatCompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []Choice `json:"choices"`
Usage Usage `json:"usage"`
}
type Choice struct {
Index int `json:"index"`
Message Message `json:"message"`
FinishReason string `json:"finish_reason"`
}
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type ChatCompletionChunk struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []StreamChoice `json:"choices"`
}
type StreamChoice struct {
Index int `json:"index"`
Delta DeltaMessage `json:"delta"`
FinishReason *string `json:"finish_reason"`
}
type DeltaMessage struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}
type ModelsResponse struct {
Object string `json:"object"`
Data []ModelObject `json:"data"`
}
// 修改 ModelObject 结构体,添加 name 和 description 字段
type ModelObject struct {
ID string `json:"id"`
Name string `json:"name"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
Description string `json:"description"`
}
type ErrorResponse struct {
Error ErrorDetail `json:"error"`
}
type ErrorDetail struct {
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code,omitempty"`
}
// Qwen API 数据结构
type QwenChatRequest struct {
Stream bool `json:"stream"`
IncrementalOutput bool `json:"incremental_output"`
ChatID string `json:"chat_id"`
ChatMode string `json:"chat_mode"`
Model string `json:"model"`
ParentID *string `json:"parent_id"`
Messages []QwenMessage `json:"messages"`
}
type QwenMessage struct {
FID string `json:"fid"`
ParentID *string `json:"parentId"`
ChildrenIDs []string `json:"childrenIds"`
Role string `json:"role"`
Content string `json:"content"`
UserAction string `json:"user_action"`
Files []interface{} `json:"files"`
Models []string `json:"models"`
ChatType string `json:"chat_type"`
FeatureConfig QwenFeatureConfig `json:"feature_config"`
Extra map[string]interface{} `json:"extra"`
SubChatType string `json:"sub_chat_type"`
}
type QwenFeatureConfig struct {
ThinkingEnabled bool `json:"thinking_enabled"`
OutputSchema string `json:"output_schema"`
ThinkingBudget int `json:"thinking_budget"`
}
type QwenChatResponse struct {
Choices []QwenChoice `json:"choices"`
}
type QwenChoice struct {
Delta QwenDelta `json:"delta"`
}
type QwenDelta struct {
Phase string `json:"phase,omitempty"`
Content string `json:"content,omitempty"`
}
type QwenNewChatRequest struct {
Title string `json:"title"`
Models []string `json:"models"`
ChatMode string `json:"chat_mode"`
ChatType string `json:"chat_type"`
Timestamp int64 `json:"timestamp"`
}
type QwenNewChatResponse struct {
Success bool `json:"success"`
Data struct {
ID string `json:"id"`
} `json:"data"`
}
type StreamChunk struct {
Content string
Phase string
}
// ==================== Qwen 客户端 ====================
type QwenClient struct {
baseURL string
httpClient *http.Client
}
func NewQwenClient() *QwenClient {
return &QwenClient{
baseURL: "https://chat.qwen.ai",
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
func (c *QwenClient) getMidToken() (string, error) {
req, err := http.NewRequest("GET", "https://sg-wum.alibaba.com/w/wu.json", nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
req.Header.Set("Accept", "*/*")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Referer", c.baseURL)
resp, err := c.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
re := regexp.MustCompile(`(?:umx\.wu|__fycb)\('([^']+)'\)`)
matches := re.FindStringSubmatch(string(body))
if len(matches) < 2 {
return "", fmt.Errorf("failed to extract bx-umidtoken")
}
return matches[1], nil
}
func (c *QwenClient) createChat(midToken, model string) (string, error) {
payload := QwenNewChatRequest{
Title: "新对话",
Models: []string{model},
ChatMode: "normal",
ChatType: "t2t",
Timestamp: time.Now().UnixMilli(),
}
jsonData, _ := json.Marshal(payload)
req, err := http.NewRequest("POST", c.baseURL+"/api/v2/chats/new", bytes.NewBuffer(jsonData))
if err != nil {
return "", err
}
c.setHeaders(req, midToken)
resp, err := c.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
var result QwenNewChatResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", err
}
if !result.Success || result.Data.ID == "" {
return "", fmt.Errorf("failed to create chat session")
}
return result.Data.ID, nil
}
func (c *QwenClient) setHeaders(req *http.Request, midToken string) {
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
req.Header.Set("Accept", "*/*")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer")
req.Header.Set("Source", "web")
req.Header.Set("Origin", c.baseURL)
req.Header.Set("Referer", c.baseURL+"/")
req.Header.Set("bx-umidtoken", midToken)
req.Header.Set("bx-v", "2.5.31")
}
func (c *QwenClient) Chat(prompt, model string, maxTokens int) (string, error) {
midToken, err := c.getMidToken()
if err != nil {
return "", fmt.Errorf("get midtoken failed: %w", err)
}
chatID, err := c.createChat(midToken, model)
if err != nil {
return "", fmt.Errorf("create chat failed: %w", err)
}
messageID := uuid.New().String()
payload := QwenChatRequest{
Stream: false,
IncrementalOutput: false,
ChatID: chatID,
ChatMode: "normal",
Model: model,
ParentID: nil,
Messages: []QwenMessage{
{
FID: messageID,
ParentID: nil,
ChildrenIDs: []string{},
Role: "user",
Content: prompt,
UserAction: "chat",
Files: []interface{}{},
Models: []string{model},
ChatType: "t2t",
FeatureConfig: QwenFeatureConfig{
ThinkingEnabled: false,
OutputSchema: "phase",
ThinkingBudget: 81920,
},
Extra: map[string]interface{}{
"meta": map[string]string{
"subChatType": "t2t",
},
},
SubChatType: "t2t",
},
},
}
jsonData, _ := json.Marshal(payload)
url := fmt.Sprintf("%s/api/v2/chat/completions?chat_id=%s", c.baseURL, chatID)
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return "", err
}
c.setHeaders(req, midToken)
resp, err := c.httpClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
var result QwenChatResponse
if err := json.Unmarshal(body, &result); err != nil {
return "", err
}
if len(result.Choices) > 0 {
return result.Choices[0].Delta.Content, nil
}
return "", fmt.Errorf("no response content")
}
func (c *QwenClient) StreamChat(prompt, model string, maxTokens int, responseChan chan<- StreamChunk) error {
midToken, err := c.getMidToken()
if err != nil {
return fmt.Errorf("get midtoken failed: %w", err)
}
chatID, err := c.createChat(midToken, model)
if err != nil {
return fmt.Errorf("create chat failed: %w", err)
}
messageID := uuid.New().String()
payload := QwenChatRequest{
Stream: true,
IncrementalOutput: true,
ChatID: chatID,
ChatMode: "normal",
Model: model,
ParentID: nil,
Messages: []QwenMessage{
{
FID: messageID,
ParentID: nil,
ChildrenIDs: []string{},
Role: "user",
Content: prompt,
UserAction: "chat",
Files: []interface{}{},
Models: []string{model},
ChatType: "t2t",
FeatureConfig: QwenFeatureConfig{
ThinkingEnabled: false,
OutputSchema: "phase",
ThinkingBudget: 81920,
},
Extra: map[string]interface{}{
"meta": map[string]string{
"subChatType": "t2t",
},
},
SubChatType: "t2t",
},
},
}
jsonData, _ := json.Marshal(payload)
url := fmt.Sprintf("%s/api/v2/chat/completions?chat_id=%s", c.baseURL, chatID)
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return err
}
c.setHeaders(req, midToken)
resp, err := c.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
reader := bufio.NewReader(resp.Body)
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
if strings.HasPrefix(line, "{") && !strings.HasPrefix(line, "data:") {
continue
}
if strings.HasPrefix(line, "data: ") {
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk QwenChatResponse
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
continue
}
if len(chunk.Choices) > 0 {
delta := chunk.Choices[0].Delta
if delta.Content != "" && delta.Phase != "think" {
responseChan <- StreamChunk{
Content: delta.Content,
Phase: delta.Phase,
}
}
}
}
}
return nil
}
// ==================== 工具函数 ====================
func generateID() string {
return fmt.Sprintf("chatcmpl-%d%d", time.Now().Unix(), rand.Intn(10000))
}
func toJSON(v interface{}) string {
b, _ := json.Marshal(v)
return string(b)
}
func estimateTokens(text string) int {
runes := []rune(text)
return len(runes)
}
// 定义模型信息结构体
type ModelInfo struct {
ID string
Name string
Description string
}
// 修改 GetQwenModels 函数,返回完整的模型信息
func GetQwenModels() []ModelInfo {
return []ModelInfo{
{
ID: "qwen3-235b-a22b",
Name: "Qwen3-235B-A22B-2507",
Description: "最强大的混合专家语言模型",
},
{
ID: "qwen3-30b-a3b",
Name: "Qwen3-30B-A3B-2507",
Description: "一个紧凑且高性能的混合专家(MoE)模型",
},
{
ID: "qwen3-coder-plus",
Name: "Qwen3-Coder",
Description: "一个能够完成长期任务的强大编码代理",
},
{
ID: "qwen-plus-2025-09-11",
Name: "Qwen3-Next-80B-A3B",
Description: "一款采用稀疏 MoE 和混合注意力机制的下一代模型,高效实现旗舰级性能",
},
{
ID: "qwen3-omni-flash",
Name: "Qwen3-Omni-Flash",
Description: "基于 Qwen3 的原生全模态大语言模型",
},
{
ID: "qwen3-vl-30b-a3b",
Name: "Qwen3-VL-30B-A3B",
Description: "一种紧凑且高性能的视觉语言混合专家(MoE)模型",
},
{
ID: "qwen3-vl-plus",
Name: "Qwen3-VL-235B-A22B",
Description: "基于Qwen3的强大多模态语言模型",
},
{
ID: "qwen3-max",
Name: "Qwen3-Max",
Description: "通义千问系列中最强大的语言模型",
},
{
ID: "qwen3-coder-30b-a3b-instruct",
Name: "Qwen3-Coder-Flash",
Description: "闪电般的速度和准确的代码生成",
},
}
}
// ==================== HTTP 处理器 ====================
// 修改 handleModels 函数
func handleModels(c *gin.Context) {
models := GetQwenModels()
response := ModelsResponse{
Object: "list",
Data: make([]ModelObject, 0, len(models)),
}
timestamp := time.Now().Unix()
for _, model := range models {
response.Data = append(response.Data, ModelObject{
ID: model.ID,
Name: model.Name,
Object: "model",
Created: timestamp,
OwnedBy: "qwen",
Description: model.Description,
})
}
c.JSON(200, response)
}
func handleChatCompletions(c *gin.Context) {
var req ChatCompletionRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, ErrorResponse{
Error: ErrorDetail{
Message: "Invalid request body: " + err.Error(),
Type: "invalid_request_error",
},
})
return
}
if len(req.Messages) == 0 {
c.JSON(400, ErrorResponse{
Error: ErrorDetail{
Message: "messages is required and cannot be empty",
Type: "invalid_request_error",
},
})
return
}
// 设置默认值
if req.Model == "" {
req.Model = "qwen3-235b-a22b"
}
if req.MaxTokens == 0 {
req.MaxTokens = 2048
}
// 提取最后一条用户消息
var userPrompt string
for i := len(req.Messages) - 1; i >= 0; i-- {
if req.Messages[i].Role == "user" {
userPrompt = req.Messages[i].Content
break
}
}
if userPrompt == "" {
c.JSON(400, ErrorResponse{
Error: ErrorDetail{
Message: "No user message found in messages array",
Type: "invalid_request_error",
},
})
return
}
client := NewQwenClient()
if req.Stream {
handleStreamResponse(c, client, userPrompt, req.Model, req.MaxTokens)
} else {
handleNonStreamResponse(c, client, userPrompt, req.Model, req.MaxTokens)
}
}
func handleStreamResponse(c *gin.Context, client *QwenClient, prompt, model string, maxTokens int) {
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
responseChan := make(chan StreamChunk, 100)
errorChan := make(chan error, 1)
go func() {
defer close(responseChan)
defer close(errorChan)
err := client.StreamChat(prompt, model, maxTokens, responseChan)
if err != nil {
errorChan <- err
}
}()
chatID := generateID()
for {
select {
case chunk, ok := <-responseChan:
if !ok {
// 发送结束标记
c.Data(200, "text/plain", []byte("data: [DONE]\n\n"))
c.Writer.Flush()
return
}
response := ChatCompletionChunk{
ID: chatID,
Object: "chat.completion.chunk",
Created: time.Now().Unix(),
Model: model,
Choices: []StreamChoice{
{
Index: 0,
Delta: DeltaMessage{
Role: "assistant",
Content: chunk.Content,
},
FinishReason: nil,
},
},
}
// 发送数据块
c.Data(200, "text/plain", []byte("data: "+toJSON(response)+"\n\n"))
c.Writer.Flush()
case err := <-errorChan:
if err != nil {
errorResponse := ErrorResponse{
Error: ErrorDetail{
Message: err.Error(),
Type: "server_error",
},
}
// 发送错误响应
c.Data(200, "text/plain", []byte("data: "+toJSON(errorResponse)+"\n\n"))
c.Writer.Flush()
return
}
}
}
}
func handleNonStreamResponse(c *gin.Context, client *QwenClient, prompt, model string, maxTokens int) {
content, err := client.Chat(prompt, model, maxTokens)
if err != nil {
c.JSON(500, ErrorResponse{
Error: ErrorDetail{
Message: err.Error(),
Type: "server_error",
},
})
return
}
response := ChatCompletionResponse{
ID: generateID(),
Object: "chat.completion",
Created: time.Now().Unix(),
Model: model,
Choices: []Choice{
{
Index: 0,
Message: Message{
Role: "assistant",
Content: content,
},
FinishReason: "stop",
},
},
Usage: Usage{
PromptTokens: estimateTokens(prompt),
CompletionTokens: estimateTokens(content),
TotalTokens: estimateTokens(prompt) + estimateTokens(content),
},
}
c.JSON(200, response)
}
// ==================== 主函数 ====================
func main() {
gin.SetMode(gin.ReleaseMode)
r := gin.Default()
r.SetTrustedProxies(nil)
// 健康检查
r.GET("/health", func(c *gin.Context) {
c.JSON(200, gin.H{
"status": "ok",
"service": "qwen-openai-proxy",
})
})
// OpenAI 兼容接口
v1 := r.Group("/v1")
{
v1.GET("/models", handleModels)
v1.POST("/chat/completions", handleChatCompletions)
}
v2 := r.Group("/api/v1")
{
v2.GET("/models", handleModels)
v2.POST("/chat/completions", handleChatCompletions)
}
v3 := r.Group("/hf/v1")
{
v3.GET("/models", handleModels)
v3.POST("/chat/completions", handleChatCompletions)
}
log.Println("🚀 Qwen OpenAI Proxy Server starting on :7860")
log.Println("📋 Available endpoints:")
log.Println(" - GET /health")
log.Println(" - GET /v1/models")
log.Println(" - POST /v1/chat/completions")
log.Println(" - GET /api/v1/models")
log.Println(" - POST /api/v1/chat/completions")
log.Println(" - GET /hf/v1/models")
log.Println(" - POST /hf/v1/chat/completions")
if err := r.Run(":7860"); err != nil {
log.Fatalf("❌ Failed to start server: %v", err)
}
}