Spaces:

Moge-Row
/

Row-proxy

Paused

ss22345 commited on Mar 14

Commit

8646505

1 Parent(s): 5a55e77

fix: improve tool calling reliability with multi-format parsing and Delta pointer fix

- Enhance system prompt with Chinese instructions and few-shot examples for
more reliable <tool_call> output from GLM models
- Add fallback parsing for [TOOL]...[/TOOL], [TOOL_CALL]...[/TOOL_CALL],
and markdown JSON block formats in prompt injection mode
- Change Choice.Delta to *Delta pointer so omitempty correctly omits the
field in non-streaming responses (fixes extra "delta":{} in JSON)
- Convert tool_calls/tool messages to plain text for upstream z.ai API
which doesn't support native tools field
- Add comprehensive tests for all new parsing formats and serialization

Files changed (9) hide show

internal/filter/prompttool.go +129 -0
internal/filter/prompttool_test.go +224 -0
internal/handler/chat.go +130 -12
internal/handler/chat_test.go +73 -0
internal/model/types.go +1 -1
internal/model/types_test.go +40 -1
internal/tools/prompt.go +96 -0
internal/tools/prompt_test.go +132 -0
internal/upstream/client.go +51 -20

internal/filter/prompttool.go ADDED Viewed

	@@ -0,0 +1,129 @@

+package filter
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"strings"
+	"github.com/google/uuid"
+	"zai-proxy/internal/model"
+)
+// promptToolCallPattern 匹配 <tool_call>...</tool_call> 块
+var promptToolCallPattern = regexp.MustCompile(`<tool_call>\s*([\s\S]*?)\s*</tool_call>`)
+// altToolCallPattern 匹配 [TOOL]...[/TOOL] 和 [TOOL_CALL]...[/TOOL_CALL] 格式
+var altToolCallPattern = regexp.MustCompile(`\[TOOL(?:_CALL)?\]\s*([\s\S]*?)\s*\[/TOOL(?:_CALL)?\]`)
+// jsonBlockPattern 匹配 markdown JSON 代码块中的 tool call
+var jsonBlockPattern = regexp.MustCompile("```json\\s*\\n(\\{[\\s\\S]*?\"name\"[\\s\\S]*?\\})\\s*\\n```")
+// allToolCallPatterns 按优先级排列的所有 tool call 模式
+var allToolCallPatterns = []*regexp.Regexp{
+	promptToolCallPattern, // <tool_call> 最高优先级
+	altToolCallPattern,    // [TOOL] / [TOOL_CALL]
+	jsonBlockPattern,      // ```json ... ```
+}
+// ExtractPromptToolCalls 从文本中提取所有 tool call 块（支持多种格式），
+// 返回清理后的文本和解析出的 tool calls。
+func ExtractPromptToolCalls(content string) (cleanContent string, toolCalls []model.ToolCall) {
+	var allCalls []model.ToolCall
+	cleaned := content
+	// 按优先级依次尝试各种格式
+	for _, pattern := range allToolCallPatterns {
+		matches := pattern.FindAllStringSubmatchIndex(cleaned, -1)
+		if len(matches) == 0 {
+			continue
+		}
+		// 从后向前移除匹配块，避免索引偏移
+		for i := len(matches) - 1; i >= 0; i-- {
+			match := matches[i]
+			fullStart, fullEnd := match[0], match[1]
+			groupStart, groupEnd := match[2], match[3]
+			jsonStr := cleaned[groupStart:groupEnd]
+			if calls := parsePromptToolCallJSON(jsonStr); len(calls) > 0 {
+				allCalls = append(calls, allCalls...)
+			}
+			cleaned = cleaned[:fullStart] + cleaned[fullEnd:]
+		}
+	}
+	if len(allCalls) == 0 {
+		return content, nil
+	}
+	// 清理多余空行
+	cleaned = strings.TrimSpace(cleaned)
+	for strings.Contains(cleaned, "\n\n\n") {
+		cleaned = strings.ReplaceAll(cleaned, "\n\n\n", "\n\n")
+	}
+	// 为每个 tool call 分配 ID
+	for i := range allCalls {
+		if allCalls[i].ID == "" {
+			allCalls[i].ID = fmt.Sprintf("call_%s", uuid.New().String()[:24])
+		}
+		allCalls[i].Index = i
+		allCalls[i].Type = "function"
+	}
+	return cleaned, allCalls
+}
+// parsePromptToolCallJSON 解析 <tool_call> 内的 JSON
+func parsePromptToolCallJSON(content string) []model.ToolCall {
+	content = strings.TrimSpace(content)
+	if content == "" {
+		return nil
+	}
+	// 标准格式: {"name": "xxx", "arguments": {...}}
+	var call struct {
+		Name      string          `json:"name"`
+		Arguments json.RawMessage `json:"arguments"`
+	}
+	if err := json.Unmarshal([]byte(content), &call); err == nil && call.Name != "" {
+		argsStr := string(call.Arguments)
+		// 如果 arguments 不是字符串，序列化为字符串
+		if len(argsStr) > 0 && argsStr[0] != '"' {
+			// 已经是 JSON 对象/其他类型，直接用
+		} else {
+			// 是 JSON 字符串，解引用
+			var s string
+			if json.Unmarshal(call.Arguments, &s) == nil {
+				argsStr = s
+			}
+		}
+		return []model.ToolCall{{
+			Function: model.FunctionCall{
+				Name:      call.Name,
+				Arguments: argsStr,
+			},
+		}}
+	}
+	return nil
+}
+// HasPromptToolCallOpen 检测文本中是否有未关闭的 tool call 标签
+func HasPromptToolCallOpen(content string) bool {
+	// <tool_call>
+	if strings.Count(content, "<tool_call>") > strings.Count(content, "</tool_call>") {
+		return true
+	}
+	// [TOOL] / [TOOL_CALL]
+	if strings.Count(content, "[TOOL]") > strings.Count(content, "[/TOOL]") {
+		return true
+	}
+	if strings.Count(content, "[TOOL_CALL]") > strings.Count(content, "[/TOOL_CALL]") {
+		return true
+	}
+	return false
+}

internal/filter/prompttool_test.go ADDED Viewed

	@@ -0,0 +1,224 @@

+package filter
+import (
+	"testing"
+)
+func TestExtractPromptToolCalls_NoToolCall(t *testing.T) {
+	content := "Hello, this is a normal response."
+	clean, calls := ExtractPromptToolCalls(content)
+	if clean != content {
+		t.Errorf("expected content unchanged, got %q", clean)
+	}
+	if len(calls) != 0 {
+		t.Error("expected no tool calls")
+	}
+}
+func TestExtractPromptToolCalls_SingleCall(t *testing.T) {
+	content := `Here is the result:
+<tool_call>{"name": "get_weather", "arguments": {"city": "Beijing"}}</tool_call>
+Done.`
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "get_weather" {
+		t.Errorf("expected name get_weather, got %s", calls[0].Function.Name)
+	}
+	if calls[0].Function.Arguments != `{"city":"Beijing"}` && calls[0].Function.Arguments != `{"city": "Beijing"}` {
+		t.Errorf("unexpected arguments: %s", calls[0].Function.Arguments)
+	}
+	if calls[0].ID == "" {
+		t.Error("expected auto-generated ID")
+	}
+	if calls[0].Type != "function" {
+		t.Errorf("expected type function, got %s", calls[0].Type)
+	}
+	// Clean content should not contain tool_call tags
+	if clean == content {
+		t.Error("expected content to be cleaned")
+	}
+	if contains := "Here is the result:"; !containsStr(clean, contains) {
+		t.Errorf("expected clean content to contain %q", contains)
+	}
+	if containsStr(clean, "<tool_call>") {
+		t.Error("clean content should not contain <tool_call>")
+	}
+}
+func TestExtractPromptToolCalls_MultipleCalls(t *testing.T) {
+	content := `<tool_call>{"name": "func_a", "arguments": {"x": 1}}</tool_call>
+<tool_call>{"name": "func_b", "arguments": {"y": 2}}</tool_call>`
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 2 {
+		t.Fatalf("expected 2 tool calls, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "func_a" {
+		t.Errorf("expected first call func_a, got %s", calls[0].Function.Name)
+	}
+	if calls[1].Function.Name != "func_b" {
+		t.Errorf("expected second call func_b, got %s", calls[1].Function.Name)
+	}
+	if calls[0].Index != 0 || calls[1].Index != 1 {
+		t.Error("expected sequential indices")
+	}
+	if clean != "" {
+		t.Errorf("expected empty clean content, got %q", clean)
+	}
+}
+func TestExtractPromptToolCalls_OnlyToolCall(t *testing.T) {
+	content := `<tool_call>{"name": "calculate", "arguments": {"expression": "2+2"}}</tool_call>`
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "calculate" {
+		t.Errorf("expected calculate, got %s", calls[0].Function.Name)
+	}
+	if clean != "" {
+		t.Errorf("expected empty clean content, got %q", clean)
+	}
+}
+func TestExtractPromptToolCalls_WithWhitespace(t *testing.T) {
+	content := `<tool_call>
+  {"name": "test", "arguments": {}}
+</tool_call>`
+	_, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "test" {
+		t.Errorf("expected test, got %s", calls[0].Function.Name)
+	}
+}
+func TestHasPromptToolCallOpen(t *testing.T) {
+	tests := []struct {
+		content  string
+		expected bool
+	}{
+		{"hello", false},
+		{"<tool_call>{}", true},
+		{"<tool_call>{}</tool_call>", false},
+		{"text <tool_call>partial...", true},
+		{"<tool_call>a</tool_call><tool_call>b", true},
+		{"[TOOL]partial", true},
+		{"[TOOL]{\"name\":\"x\"}[/TOOL]", false},
+		{"[TOOL_CALL]partial", true},
+		{"[TOOL_CALL]{\"name\":\"x\"}[/TOOL_CALL]", false},
+	}
+	for _, tt := range tests {
+		if got := HasPromptToolCallOpen(tt.content); got != tt.expected {
+			t.Errorf("HasPromptToolCallOpen(%q) = %v, want %v", tt.content, got, tt.expected)
+		}
+	}
+}
+// ===== [TOOL]...[/TOOL] 格式 =====
+func TestExtractPromptToolCalls_AltToolFormat(t *testing.T) {
+	content := `[TOOL]{"name": "get_weather", "arguments": {"city": "上海"}}[/TOOL]`
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "get_weather" {
+		t.Errorf("expected get_weather, got %s", calls[0].Function.Name)
+	}
+	if calls[0].Type != "function" {
+		t.Errorf("expected type function, got %s", calls[0].Type)
+	}
+	if clean != "" {
+		t.Errorf("expected empty clean, got %q", clean)
+	}
+}
+func TestExtractPromptToolCalls_AltToolCallFormat(t *testing.T) {
+	content := `好的，我来调用工具。
+[TOOL_CALL]{"name": "create_file", "arguments": {"filename": "test.txt", "content": "hello"}}[/TOOL_CALL]`
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "create_file" {
+		t.Errorf("expected create_file, got %s", calls[0].Function.Name)
+	}
+	if !containsStr(clean, "好的") {
+		t.Errorf("expected clean to contain surrounding text, got %q", clean)
+	}
+}
+// ===== markdown JSON block 格式 =====
+func TestExtractPromptToolCalls_JsonBlockFormat(t *testing.T) {
+	content := "我来调用工具：\n```json\n{\"name\": \"get_weather\", \"arguments\": {\"city\": \"北京\"}}\n```\n"
+	clean, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "get_weather" {
+		t.Errorf("expected get_weather, got %s", calls[0].Function.Name)
+	}
+	if containsStr(clean, "```") {
+		t.Errorf("expected clean to not contain code block, got %q", clean)
+	}
+}
+// ===== 混合格式 =====
+func TestExtractPromptToolCalls_MixedFormats(t *testing.T) {
+	content := `<tool_call>{"name": "func_a", "arguments": {}}</tool_call>
+[TOOL]{"name": "func_b", "arguments": {}}[/TOOL]`
+	_, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 2 {
+		t.Fatalf("expected 2 tool calls, got %d", len(calls))
+	}
+	// <tool_call> 优先被解析
+	names := map[string]bool{}
+	for _, c := range calls {
+		names[c.Function.Name] = true
+	}
+	if !names["func_a"] || !names["func_b"] {
+		t.Error("expected both func_a and func_b to be extracted")
+	}
+}
+// ===== <tool_call> 优先于其他格式 =====
+func TestExtractPromptToolCalls_ToolCallPriority(t *testing.T) {
+	content := `<tool_call>{"name": "correct", "arguments": {}}</tool_call>`
+	_, calls := ExtractPromptToolCalls(content)
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "correct" {
+		t.Errorf("expected correct, got %s", calls[0].Function.Name)
+	}
+}
+func containsStr(s, substr string) bool {
+	return len(s) >= len(substr) && (s == substr || len(s) > 0 && findSubstr(s, substr))
+}
+func findSubstr(s, substr string) bool {
+	for i := 0; i <= len(s)-len(substr); i++ {
+		if s[i:i+len(substr)] == substr {
+			return true
+		}
+	}
+	return false
+}

internal/handler/chat.go CHANGED Viewed

@@ -94,6 +94,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 	totalContentOutputLength := 0
 	hasToolCalls := false
 	var collectedToolCalls []model.ToolCall
 	for scanner.Scan() {
 		line := scanner.Text()
@@ -145,7 +146,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
-							Delta:        model.Delta{ReasoningContent: reasoningContent},
 							FinishReason: nil,
 						}},
 					}
@@ -182,7 +183,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
-							Delta:        model.Delta{Content: textBeforeBlock},
 							FinishReason: nil,
 						}},
 					}
@@ -209,7 +210,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
-							Delta:        model.Delta{Content: textBeforeBlock},
 							FinishReason: nil,
 						}},
 					}
@@ -247,7 +248,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index: 0,
-							Delta: model.Delta{
 								ToolCalls: []model.ToolCall{tc},
 							},
 							FinishReason: nil,
@@ -270,7 +271,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
-					Delta:        model.Delta{Content: pendingSourcesMarkdown},
 					FinishReason: nil,
 				}},
 			}
@@ -288,7 +289,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
-					Delta:        model.Delta{Content: pendingImageSearchMarkdown},
 					FinishReason: nil,
 				}},
 			}
@@ -313,7 +314,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 					Model:   modelName,
 					Choices: []model.Choice{{
 						Index:        0,
-						Delta:        model.Delta{ReasoningContent: processedRemaining},
 						FinishReason: nil,
 					}},
 				}
@@ -332,7 +333,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
-					Delta:        model.Delta{ReasoningContent: pendingSourcesMarkdown},
 					FinishReason: nil,
 				}},
 			}
@@ -382,7 +383,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
-					Delta:        model.Delta{ReasoningContent: reasoningContent},
 					FinishReason: nil,
 				}},
 			}
@@ -405,6 +406,63 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 			totalContentOutputLength += len([]rune(content))
 		}
 		chunk := model.ChatCompletionChunk{
 			ID:      completionID,
 			Object:  "chat.completion.chunk",
@@ -412,7 +470,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 			Model:   modelName,
 			Choices: []model.Choice{{
 				Index:        0,
-				Delta:        model.Delta{Content: content},
 				FinishReason: nil,
 			}},
 		}
@@ -426,6 +484,39 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 		logger.LogError("[Upstream] scanner error: %v", err)
 	}
 	if remaining := searchRefFilter.Flush(); remaining != "" {
 		hasContent = true
 		chunk := model.ChatCompletionChunk{
@@ -435,7 +526,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 			Model:   modelName,
 			Choices: []model.Choice{{
 				Index:        0,
-				Delta:        model.Delta{Content: remaining},
 				FinishReason: nil,
 			}},
 		}
@@ -459,7 +550,7 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
 		Model:   modelName,
 		Choices: []model.Choice{{
 			Index:        0,
-			Delta:        model.Delta{},
 			FinishReason: &stopReason,
 		}},
 	}
@@ -616,6 +707,15 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
 	fullReasoning := strings.Join(reasoningChunks, "")
 	fullReasoning = searchRefFilter.Process(fullReasoning) + searchRefFilter.Flush()
 	if fullContent == "" && len(collectedToolCalls) == 0 {
 		logger.LogError("Non-stream response 200 but no content received")
 	}
@@ -644,3 +744,21 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(response)
 }

 	totalContentOutputLength := 0
 	hasToolCalls := false
 	var collectedToolCalls []model.ToolCall
+	promptToolBuffer := "" // 用于 prompt 注入模式下缓冲 answer 文本以检测 <tool_call>
 	for scanner.Scan() {
 		line := scanner.Text()
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
+							Delta:        &model.Delta{ReasoningContent: reasoningContent},
 							FinishReason: nil,
 						}},
 					}
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
+							Delta:        &model.Delta{Content: textBeforeBlock},
 							FinishReason: nil,
 						}},
 					}
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index:        0,
+							Delta:        &model.Delta{Content: textBeforeBlock},
 							FinishReason: nil,
 						}},
 					}
 						Model:   modelName,
 						Choices: []model.Choice{{
 							Index: 0,
+							Delta: &model.Delta{
 								ToolCalls: []model.ToolCall{tc},
 							},
 							FinishReason: nil,
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
+					Delta:        &model.Delta{Content: pendingSourcesMarkdown},
 					FinishReason: nil,
 				}},
 			}
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
+					Delta:        &model.Delta{Content: pendingImageSearchMarkdown},
 					FinishReason: nil,
 				}},
 			}
 					Model:   modelName,
 					Choices: []model.Choice{{
 						Index:        0,
+						Delta:        &model.Delta{ReasoningContent: processedRemaining},
 						FinishReason: nil,
 					}},
 				}
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
+					Delta:        &model.Delta{ReasoningContent: pendingSourcesMarkdown},
 					FinishReason: nil,
 				}},
 			}
 				Model:   modelName,
 				Choices: []model.Choice{{
 					Index:        0,
+					Delta:        &model.Delta{ReasoningContent: reasoningContent},
 					FinishReason: nil,
 				}},
 			}
 			totalContentOutputLength += len([]rune(content))
 		}
+		// prompt 注入模式：缓冲 answer 文本，检测 <tool_call> 块
+		if len(tools) > 0 {
+			promptToolBuffer += content
+			// 循环提取完整的 <tool_call>...</tool_call> 块
+			for {
+				openIdx := strings.Index(promptToolBuffer, "<tool_call>")
+				if openIdx == -1 {
+					// 无 <tool_call> 标签，全部安全输出
+					break
+				}
+				// 输出 <tool_call> 之前的安全文本
+				if openIdx > 0 {
+					safeContent := promptToolBuffer[:openIdx]
+					promptToolBuffer = promptToolBuffer[openIdx:]
+					if safeContent != "" {
+						sendContentChunk(w, flusher, completionID, modelName, safeContent)
+					}
+				}
+				// 检查是否有完整的闭合标签
+				closeIdx := strings.Index(promptToolBuffer, "</tool_call>")
+				if closeIdx == -1 {
+					// 未闭合，等待更多数据
+					break
+				}
+				// 提取完整块
+				blockEnd := closeIdx + len("</tool_call>")
+				block := promptToolBuffer[:blockEnd]
+				promptToolBuffer = promptToolBuffer[blockEnd:]
+				// 解析 tool call
+				_, toolCalls := filter.ExtractPromptToolCalls(block)
+				if len(toolCalls) > 0 {
+					collectedToolCalls = append(collectedToolCalls, toolCalls...)
+					hasToolCalls = true
+					for _, tc := range toolCalls {
+						chunk := model.ChatCompletionChunk{
+							ID:      completionID,
+							Object:  "chat.completion.chunk",
+							Created: time.Now().Unix(),
+							Model:   modelName,
+							Choices: []model.Choice{{
+								Index: 0,
+								Delta: &model.Delta{
+									ToolCalls: []model.ToolCall{tc},
+								},
+								FinishReason: nil,
+							}},
+						}
+						data, _ := json.Marshal(chunk)
+						fmt.Fprintf(w, "data: %s\n\n", data)
+						flusher.Flush()
+					}
+				}
+			}
+			continue
+		}
 		chunk := model.ChatCompletionChunk{
 			ID:      completionID,
 			Object:  "chat.completion.chunk",
 			Model:   modelName,
 			Choices: []model.Choice{{
 				Index:        0,
+				Delta:        &model.Delta{Content: content},
 				FinishReason: nil,
 			}},
 		}
 		logger.LogError("[Upstream] scanner error: %v", err)
 	}
+	// prompt 注入模式：flush 缓冲区中剩余的文本
+	if promptToolBuffer != "" {
+		// 尝试最后一次提取 tool calls
+		cleanContent, toolCalls := filter.ExtractPromptToolCalls(promptToolBuffer)
+		if len(toolCalls) > 0 {
+			collectedToolCalls = append(collectedToolCalls, toolCalls...)
+			hasToolCalls = true
+			for _, tc := range toolCalls {
+				chunk := model.ChatCompletionChunk{
+					ID:      completionID,
+					Object:  "chat.completion.chunk",
+					Created: time.Now().Unix(),
+					Model:   modelName,
+					Choices: []model.Choice{{
+						Index: 0,
+						Delta: &model.Delta{
+							ToolCalls: []model.ToolCall{tc},
+						},
+						FinishReason: nil,
+					}},
+				}
+				data, _ := json.Marshal(chunk)
+				fmt.Fprintf(w, "data: %s\n\n", data)
+				flusher.Flush()
+			}
+		}
+		if cleanContent != "" {
+			sendContentChunk(w, flusher, completionID, modelName, cleanContent)
+			hasContent = true
+		}
+		promptToolBuffer = ""
+	}
 	if remaining := searchRefFilter.Flush(); remaining != "" {
 		hasContent = true
 		chunk := model.ChatCompletionChunk{
 			Model:   modelName,
 			Choices: []model.Choice{{
 				Index:        0,
+				Delta:        &model.Delta{Content: remaining},
 				FinishReason: nil,
 			}},
 		}
 		Model:   modelName,
 		Choices: []model.Choice{{
 			Index:        0,
+			Delta:        &model.Delta{},
 			FinishReason: &stopReason,
 		}},
 	}
 	fullReasoning := strings.Join(reasoningChunks, "")
 	fullReasoning = searchRefFilter.Process(fullReasoning) + searchRefFilter.Flush()
+	// prompt 注入模式：从 answer 文本中提取 <tool_call> 块
+	if len(tools) > 0 && len(collectedToolCalls) == 0 {
+		cleanContent, promptToolCalls := filter.ExtractPromptToolCalls(fullContent)
+		if len(promptToolCalls) > 0 {
+			collectedToolCalls = promptToolCalls
+			fullContent = cleanContent
+		}
+	}
 	if fullContent == "" && len(collectedToolCalls) == 0 {
 		logger.LogError("Non-stream response 200 but no content received")
 	}
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(response)
 }
+// sendContentChunk 发送一个 content SSE chunk
+func sendContentChunk(w http.ResponseWriter, flusher http.Flusher, completionID, modelName, content string) {
+	chunk := model.ChatCompletionChunk{
+		ID:      completionID,
+		Object:  "chat.completion.chunk",
+		Created: time.Now().Unix(),
+		Model:   modelName,
+		Choices: []model.Choice{{
+			Index:        0,
+			Delta:        &model.Delta{Content: content},
+			FinishReason: nil,
+		}},
+	}
+	data, _ := json.Marshal(chunk)
+	fmt.Fprintf(w, "data: %s\n\n", data)
+	flusher.Flush()
+}

internal/handler/chat_test.go CHANGED Viewed

@@ -574,3 +574,76 @@ func TestNonStreamResponse_FullFormat(t *testing.T) {
 		t.Errorf("Role = %q", resp.Choices[0].Message.Role)
 	}
 }

 		t.Errorf("Role = %q", resp.Choices[0].Message.Role)
 	}
 }
+// ===== 流式：prompt 注入模式 <tool_call> 在 answer 文本中 =====
+func TestStreamResponse_PromptInjectionToolCall(t *testing.T) {
+	body := newFakeBody(
+		sseEvent("answer", "好的，我来查询。\n", ""),
+		sseEvent("answer", `<tool_call>{"name":"get_weather","arguments":{"city":"北京"}}</tool_call>`, ""),
+		sseEventDone(),
+	)
+	w := httptest.NewRecorder()
+	handleStreamResponse(w, body, "chatcmpl-test", "glm-4.7", dummyTools())
+	result := w.Body.String()
+	if !strings.Contains(result, `"tool_calls"`) {
+		t.Error("missing tool_calls in prompt injection stream")
+	}
+	if !strings.Contains(result, `"get_weather"`) {
+		t.Error("missing function name")
+	}
+	if !strings.Contains(result, `"finish_reason":"tool_calls"`) {
+		t.Error("finish_reason should be tool_calls")
+	}
+}
+// ===== 非流式：prompt 注入模式 =====
+func TestNonStreamResponse_PromptInjectionToolCall(t *testing.T) {
+	body := newFakeBody(
+		sseEvent("answer", "我来查询天气。\n<tool_call>{\"name\":\"get_weather\",\"arguments\":{\"city\":\"上海\"}}</tool_call>", ""),
+		sseEventDone(),
+	)
+	w := httptest.NewRecorder()
+	handleNonStreamResponse(w, body, "chatcmpl-test", "glm-4.7", dummyTools())
+	var resp model.ChatCompletionResponse
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	msg := resp.Choices[0].Message
+	if len(msg.ToolCalls) != 1 {
+		t.Fatalf("len(ToolCalls) = %d, want 1", len(msg.ToolCalls))
+	}
+	if msg.ToolCalls[0].Function.Name != "get_weather" {
+		t.Errorf("Function.Name = %q", msg.ToolCalls[0].Function.Name)
+	}
+	if strings.Contains(msg.Content, "<tool_call>") {
+		t.Error("content should not contain <tool_call> tags")
+	}
+	if *resp.Choices[0].FinishReason != "tool_calls" {
+		t.Errorf("FinishReason = %q, want tool_calls", *resp.Choices[0].FinishReason)
+	}
+}
+// ===== 非流式：response 中不应有 delta 字段 =====
+func TestNonStreamResponse_NoDeltaField(t *testing.T) {
+	body := newFakeBody(
+		sseEvent("answer", "hello", ""),
+		sseEventDone(),
+	)
+	w := httptest.NewRecorder()
+	handleNonStreamResponse(w, body, "chatcmpl-test", "glm-4.7", nil)
+	result := w.Body.String()
+	if strings.Contains(result, `"delta"`) {
+		t.Error("non-streaming response should not contain delta field")
+	}
+}

internal/model/types.go CHANGED Viewed

@@ -160,7 +160,7 @@ type ChatCompletionChunk struct {
 type Choice struct {
 	Index        int          `json:"index"`
-	Delta        Delta        `json:"delta,omitempty"`
 	Message      *MessageResp `json:"message,omitempty"`
 	FinishReason *string      `json:"finish_reason"`
 }

 type Choice struct {
 	Index        int          `json:"index"`
+	Delta        *Delta       `json:"delta,omitempty"`
 	Message      *MessageResp `json:"message,omitempty"`
 	FinishReason *string      `json:"finish_reason"`
 }

internal/model/types_test.go CHANGED Viewed

@@ -2,6 +2,7 @@ package model
 import (
 	"encoding/json"
 	"testing"
 )
@@ -423,7 +424,7 @@ func TestChunkWithToolCallsFinishReason(t *testing.T) {
 		Model:   "glm-4.7",
 		Choices: []Choice{{
 			Index:        0,
-			Delta:        Delta{},
 			FinishReason: &reason,
 		}},
 	}
@@ -501,3 +502,41 @@ func TestCompletionResponseWithToolCalls(t *testing.T) {
 		t.Errorf("FinishReason = %q", *decoded.Choices[0].FinishReason)
 	}
 }

 import (
 	"encoding/json"
+	"strings"
 	"testing"
 )
 		Model:   "glm-4.7",
 		Choices: []Choice{{
 			Index:        0,
+			Delta:        &Delta{},
 			FinishReason: &reason,
 		}},
 	}
 		t.Errorf("FinishReason = %q", *decoded.Choices[0].FinishReason)
 	}
 }
+// ===== Delta 指针：nil 时不出现在 JSON 中 =====
+func TestChoiceDeltaNil_OmittedInJSON(t *testing.T) {
+	reason := "stop"
+	choice := Choice{
+		Index: 0,
+		Message: &MessageResp{
+			Role:    "assistant",
+			Content: "hello",
+		},
+		FinishReason: &reason,
+	}
+	data, _ := json.Marshal(choice)
+	s := string(data)
+	if strings.Contains(s, `"delta"`) {
+		t.Errorf("nil Delta should be omitted, got: %s", s)
+	}
+}
+// ===== Delta 指针：非 nil 时正常序列化 =====
+func TestChoiceDeltaNotNil_SerializedInJSON(t *testing.T) {
+	choice := Choice{
+		Index: 0,
+		Delta: &Delta{Content: "test content"},
+	}
+	data, _ := json.Marshal(choice)
+	s := string(data)
+	if !strings.Contains(s, `"delta"`) {
+		t.Error("non-nil Delta should appear in JSON")
+	}
+	if !strings.Contains(s, `"test content"`) {
+		t.Error("Delta content should be serialized")
+	}
+}

internal/tools/prompt.go ADDED Viewed

	@@ -0,0 +1,96 @@

+package tools
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+	"zai-proxy/internal/model"
+)
+// BuildToolSystemPrompt 将工具定义列表转换为 system prompt 文本，
+// 指示模型使用 <tool_call> 格式输出工具调用。
+func BuildToolSystemPrompt(tools []model.Tool, toolChoice interface{}) string {
+	if len(tools) == 0 {
+		return ""
+	}
+	var sb strings.Builder
+	sb.WriteString("# 工具调用规则\n\n")
+	sb.WriteString("你可以使用下面列出的工具。当你需要调用工具时，**必须严格使用以下 XML 格式**输出调用请求（不要使用 markdown 代码块、不要使用 [TOOL] 或其他格式）：\n\n")
+	sb.WriteString("<tool_call>{\"name\": \"函数名\", \"arguments\": {\"参数名\": \"参数值\"}}</tool_call>\n\n")
+	sb.WriteString("**重要规则：**\n")
+	sb.WriteString("- 你不能自行执行工具，只能输出 <tool_call> 标签，由系统执行后将结果返回给你\n")
+	sb.WriteString("- 每个工具调用必须独立包裹在 <tool_call></tool_call> 标签中\n")
+	sb.WriteString("- arguments 必须是合法 JSON 对象\n")
+	sb.WriteString("- 不要在 <tool_call> 标签外描述调用参数\n\n")
+	sb.WriteString("## 示例\n\n")
+	sb.WriteString("用户: 帮我创建一个文件 test.txt 内容为 hello\n")
+	sb.WriteString("助手: 好的，我来为您创建文件。\n")
+	sb.WriteString("<tool_call>{\"name\": \"create_file\", \"arguments\": {\"filename\": \"test.txt\", \"content\": \"hello\"}}</tool_call>\n\n")
+	sb.WriteString("用户: 查询北京和上海的天气\n")
+	sb.WriteString("助手: 我来查询这两个城市的天气。\n")
+	sb.WriteString("<tool_call>{\"name\": \"get_weather\", \"arguments\": {\"location\": \"北京\"}}</tool_call>\n")
+	sb.WriteString("<tool_call>{\"name\": \"get_weather\", \"arguments\": {\"location\": \"上海\"}}</tool_call>\n\n")
+	sb.WriteString("## 可用工具\n\n")
+	for _, tool := range tools {
+		sb.WriteString(fmt.Sprintf("### %s\n", tool.Function.Name))
+		if tool.Function.Description != "" {
+			sb.WriteString(fmt.Sprintf("%s\n", tool.Function.Description))
+		}
+		if tool.Function.Parameters != nil {
+			params, err := json.Marshal(tool.Function.Parameters)
+			if err == nil {
+				sb.WriteString(fmt.Sprintf("Parameters: %s\n", string(params)))
+			}
+		}
+		sb.WriteString("\n")
+	}
+	// 处理 tool_choice
+	if toolChoice != nil {
+		switch tc := toolChoice.(type) {
+		case string:
+			switch tc {
+			case "none":
+				sb.WriteString("**禁止调用任何工具，直接回答问题。**\n")
+			case "required":
+				sb.WriteString("**你的回复中必须包含至少一个 <tool_call> 标签。即使你认为不需要调用工具，也必须调用。**\n")
+			// "auto" is the default, no special instruction needed
+			}
+		case map[string]interface{}:
+			// tool_choice = {"type": "function", "function": {"name": "xxx"}}
+			if fn, ok := tc["function"].(map[string]interface{}); ok {
+				if name, ok := fn["name"].(string); ok {
+					sb.WriteString(fmt.Sprintf("**你必须调用工具 \"%s\"，使用 <tool_call> 标签输出调用。**\n", name))
+				}
+			}
+		}
+	}
+	return sb.String()
+}
+// ConvertToolCallToText 将 assistant 消息中的 tool_calls 转换为 <tool_call> 文本格式，
+// 用于在 prompt 注入模式下将历史 tool_calls 传给上游。
+func ConvertToolCallToText(toolCalls []model.ToolCall) string {
+	var parts []string
+	for _, tc := range toolCalls {
+		callJSON, _ := json.Marshal(map[string]interface{}{
+			"name":      tc.Function.Name,
+			"arguments": json.RawMessage(tc.Function.Arguments),
+		})
+		parts = append(parts, fmt.Sprintf("<tool_call>%s</tool_call>", string(callJSON)))
+	}
+	return strings.Join(parts, "\n")
+}
+// ConvertToolResultToText 将 tool 角色的消息转换为文本格式，
+// 用于在 prompt 注入模式下传递工具执行结果。
+func ConvertToolResultToText(toolCallID string, content string) string {
+	return fmt.Sprintf("<tool_result call_id=\"%s\">%s</tool_result>", toolCallID, content)
+}

internal/tools/prompt_test.go ADDED Viewed

	@@ -0,0 +1,132 @@

+package tools
+import (
+	"strings"
+	"testing"
+	"zai-proxy/internal/model"
+)
+func TestBuildToolSystemPrompt_Basic(t *testing.T) {
+	tools := []model.Tool{
+		{
+			Type: "function",
+			Function: model.ToolFunction{
+				Name:        "get_weather",
+				Description: "Get current weather",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"city": map[string]interface{}{
+							"type":        "string",
+							"description": "City name",
+						},
+					},
+					"required": []string{"city"},
+				},
+			},
+		},
+	}
+	result := BuildToolSystemPrompt(tools, nil)
+	if !strings.Contains(result, "get_weather") {
+		t.Error("should contain tool name")
+	}
+	if !strings.Contains(result, "Get current weather") {
+		t.Error("should contain description")
+	}
+	if !strings.Contains(result, "<tool_call>") {
+		t.Error("should contain format instruction")
+	}
+	if !strings.Contains(result, "city") {
+		t.Error("should contain parameter info")
+	}
+}
+func TestBuildToolSystemPrompt_Empty(t *testing.T) {
+	result := BuildToolSystemPrompt(nil, nil)
+	if result != "" {
+		t.Error("should return empty for nil tools")
+	}
+}
+func TestBuildToolSystemPrompt_ToolChoiceNone(t *testing.T) {
+	tools := []model.Tool{{
+		Type:     "function",
+		Function: model.ToolFunction{Name: "test"},
+	}}
+	result := BuildToolSystemPrompt(tools, "none")
+	if !strings.Contains(result, "禁止调用任何工具") {
+		t.Error("should instruct not to call tools")
+	}
+}
+func TestBuildToolSystemPrompt_ToolChoiceRequired(t *testing.T) {
+	tools := []model.Tool{{
+		Type:     "function",
+		Function: model.ToolFunction{Name: "test"},
+	}}
+	result := BuildToolSystemPrompt(tools, "required")
+	if !strings.Contains(result, "必须包含至少一个") {
+		t.Error("should instruct to call at least one tool")
+	}
+}
+func TestBuildToolSystemPrompt_ToolChoiceSpecific(t *testing.T) {
+	tools := []model.Tool{{
+		Type:     "function",
+		Function: model.ToolFunction{Name: "get_weather"},
+	}}
+	choice := map[string]interface{}{
+		"type": "function",
+		"function": map[string]interface{}{
+			"name": "get_weather",
+		},
+	}
+	result := BuildToolSystemPrompt(tools, choice)
+	if !strings.Contains(result, `必须调用工具 "get_weather"`) {
+		t.Error("should instruct to call specific tool")
+	}
+}
+func TestConvertToolCallToText(t *testing.T) {
+	toolCalls := []model.ToolCall{
+		{
+			ID:   "call_123",
+			Type: "function",
+			Function: model.FunctionCall{
+				Name:      "get_weather",
+				Arguments: `{"city":"Beijing"}`,
+			},
+		},
+	}
+	result := ConvertToolCallToText(toolCalls)
+	if !strings.Contains(result, "<tool_call>") {
+		t.Error("should contain <tool_call> tag")
+	}
+	if !strings.Contains(result, "get_weather") {
+		t.Error("should contain function name")
+	}
+	if !strings.Contains(result, "Beijing") {
+		t.Error("should contain arguments")
+	}
+}
+func TestConvertToolResultToText(t *testing.T) {
+	result := ConvertToolResultToText("call_123", `{"temp": 25}`)
+	if !strings.Contains(result, "call_123") {
+		t.Error("should contain call ID")
+	}
+	if !strings.Contains(result, `{"temp": 25}`) {
+		t.Error("should contain result content")
+	}
+	if !strings.Contains(result, "<tool_result") {
+		t.Error("should contain <tool_result> tag")
+	}
+}

internal/upstream/client.go CHANGED Viewed

@@ -94,11 +94,62 @@ func MakeUpstreamRequest(token string, messages []model.Message, modelName strin
 		}
 	}
 	var upstreamMessages []map[string]interface{}
 	for _, msg := range messages {
 		upstreamMessages = append(upstreamMessages, msg.ToUpstreamMessage(urlToFileID))
 	}
 	body := map[string]interface{}{
 		"stream":           true,
 		"model":            targetModel,
@@ -120,26 +171,6 @@ func MakeUpstreamRequest(token string, messages []model.Message, modelName strin
 		body["mcp_servers"] = mcpServers
 	}
-	// 当使用 -tools 模型时，自动注入内置工具（客户端自带工具优先）
-	if model.IsToolsModel(modelName) {
-		clientToolNames := make(map[string]bool)
-		for _, t := range tools {
-			clientToolNames[t.Function.Name] = true
-		}
-		for _, bt := range builtintools.GetBuiltinTools() {
-			if !clientToolNames[bt.Function.Name] {
-				tools = append(tools, bt)
-			}
-		}
-	}
-	if len(tools) > 0 {
-		body["tools"] = tools
-		if toolChoice != nil {
-			body["tool_choice"] = toolChoice
-		}
-	}
 	if len(filesData) > 0 {
 		body["files"] = filesData
 		body["current_user_message_id"] = userMsgID

 		}
 	}
+	// 当使用 -tools 模型时，自动注入内置工具（客户端自带工具优先）
+	if model.IsToolsModel(modelName) {
+		clientToolNames := make(map[string]bool)
+		for _, t := range tools {
+			clientToolNames[t.Function.Name] = true
+		}
+		for _, bt := range builtintools.GetBuiltinTools() {
+			if !clientToolNames[bt.Function.Name] {
+				tools = append(tools, bt)
+			}
+		}
+	}
 	var upstreamMessages []map[string]interface{}
+	hasPromptTools := len(tools) > 0
 	for _, msg := range messages {
+		if hasPromptTools {
+			// prompt 注入模式：将 tool_calls / tool 结果转为纯文本
+			if msg.Role == "assistant" && len(msg.ToolCalls) > 0 {
+				text, _ := msg.ParseContent()
+				callText := builtintools.ConvertToolCallToText(msg.ToolCalls)
+				if text != "" {
+					text = text + "\n" + callText
+				} else {
+					text = callText
+				}
+				upstreamMessages = append(upstreamMessages, map[string]interface{}{
+					"role":    "assistant",
+					"content": text,
+				})
+				continue
+			}
+			if msg.Role == "tool" {
+				text, _ := msg.ParseContent()
+				upstreamMessages = append(upstreamMessages, map[string]interface{}{
+					"role":    "user",
+					"content": builtintools.ConvertToolResultToText(msg.ToolCallID, text),
+				})
+				continue
+			}
+		}
 		upstreamMessages = append(upstreamMessages, msg.ToUpstreamMessage(urlToFileID))
 	}
+	// 工具注入：通过 system prompt 注入工具定义（z.ai 不支持原生 tools 字段）
+	if len(tools) > 0 {
+		toolSystemPrompt := builtintools.BuildToolSystemPrompt(tools, toolChoice)
+		if toolSystemPrompt != "" {
+			systemMsg := map[string]interface{}{
+				"role":    "system",
+				"content": toolSystemPrompt,
+			}
+			upstreamMessages = append([]map[string]interface{}{systemMsg}, upstreamMessages...)
+		}
+	}
 	body := map[string]interface{}{
 		"stream":           true,
 		"model":            targetModel,
 		body["mcp_servers"] = mcpServers
 	}
 	if len(filesData) > 0 {
 		body["files"] = filesData
 		body["current_user_message_id"] = userMsgID