Spaces:

Zyan11
/

LocalAI-Amlan-Edition

Running

LocalAI-Amlan-Edition / pkg /reasoning /reasoning.go

Amlan-109

feat: Initial commit of LocalAI Amlan Edition with premium branding and personalization

750bbe6 7 days ago

9.04 kB

	package reasoning

	import (
	"strings"
	)

	// DetectThinkingStartToken checks if the prompt or template contains a thinking start token
	// and returns the detected token. This indicates that the model's prompt template
	// already includes the thinking token, so the model output will start with reasoning
	// content without an explicit opening tag.
	// Returns the detected token if found, empty string otherwise.
	// Common tokens checked (in order of specificity - longer first):
	// Based on llama.cpp's chat-parser.cpp implementations:
	// - <\|START_THINKING\|> (Command-R models)
	// - <\|inner_prefix\|> (Apertus models)
	// - <seed:think> (Seed models)
	// - <think> (DeepSeek, Granite, ExaOne models)
	// - <\|think\|> (Solar Open models)
	// - <thinking> (General thinking tag)
	// - [THINK] (Magistral models)
	// Custom tokens from config are checked first, then default tokens.
	func DetectThinkingStartToken(prompt string, config *Config) string {
	// Common thinking start tokens (in order of specificity - longer first)
	// Based on llama.cpp's chat-parser.cpp implementations
	defaultTokens := []string{
	"<\|START_THINKING\|>", // Command-R models
	"<\|inner_prefix\|>", // Apertus models
	"<seed:think>", // Seed models
	"<think>", // DeepSeek, Granite, ExaOne models
	"<\|think\|>", // Solar Open models
	"<thinking>", // General thinking tag
	"[THINK]", // Magistral models
	}

	// Merge custom tokens with default tokens (custom tokens first for priority)
	var thinkingStartTokens []string
	if config != nil && len(config.ThinkingStartTokens) > 0 {
	thinkingStartTokens = append(thinkingStartTokens, config.ThinkingStartTokens...)
	}
	thinkingStartTokens = append(thinkingStartTokens, defaultTokens...)

	// Check if prompt ends with any of these tokens (allowing for trailing whitespace/newlines)
	trimmedPrompt := strings.TrimRight(prompt, " \t\n\r")
	for _, token := range thinkingStartTokens {
	if strings.Contains(trimmedPrompt, token) {
	return token
	}
	}

	// Also check if any of these tokens appear near the end (within last 100 chars)
	// This handles cases where there might be stop tokens or other content after
	if len(trimmedPrompt) > 100 {
	lastPart := trimmedPrompt[len(trimmedPrompt)-100:]
	for _, token := range thinkingStartTokens {
	if idx := strings.LastIndex(lastPart, token); idx != -1 {
	// Check if this is the last meaningful content (only whitespace after)
	afterToken := lastPart[idx+len(token):]
	if strings.TrimSpace(afterToken) == "" {
	return token
	}
	}
	}
	}

	return ""
	}

	// ExtractReasoningWithConfig extracts reasoning from content with the given config.
	// If reasoning is disabled, it returns the original content.
	// If thinking start token prefill is enabled, it prepends the thinking start token to the content.
	// It returns the extracted reasoning and the cleaned content.
	func ExtractReasoningWithConfig(content, thinkingStartToken string, config Config) (reasoning string, cleanedContent string) {
	cleanedContent = content
	// If reasoning is not disabled, prepend the thinking start token if needed and extract reasoning
	if config.DisableReasoning == nil \|\| !*config.DisableReasoning {
	// If thinking start token prefill is not disabled, prepend the thinking start token
	if config.DisableReasoningTagPrefill == nil \|\| !*config.DisableReasoningTagPrefill {
	cleanedContent = PrependThinkingTokenIfNeeded(cleanedContent, thinkingStartToken)
	}
	// Extract reasoning from the cleaned content
	reasoning, cleanedContent = ExtractReasoning(cleanedContent, &config)
	if config.StripReasoningOnly != nil && *config.StripReasoningOnly {
	reasoning = ""
	}
	}

	return reasoning, cleanedContent
	}

	// PrependThinkingTokenIfNeeded prepends the thinking start token to content if it was
	// detected in the prompt. This allows the standard extraction logic to work correctly
	// for models where the thinking token is already in the prompt.
	func PrependThinkingTokenIfNeeded(content string, startToken string) string {
	if startToken == "" {
	return content
	}

	// Check if content already starts with the token (allowing for leading whitespace)
	trimmed := strings.TrimLeftFunc(content, func(r rune) bool {
	return r == ' ' \|\| r == '\t' \|\| r == '\n' \|\| r == '\r'
	})

	// If content already starts with the token, don't prepend
	if strings.Contains(trimmed, startToken) {
	return content
	}

	// Find where leading whitespace ends
	whitespaceEnd := 0
	for whitespaceEnd < len(content) {
	r := content[whitespaceEnd]
	if r != ' ' && r != '\t' && r != '\n' && r != '\r' {
	break
	}
	whitespaceEnd++
	}

	// Prepend the token after whitespace to make it look like normal tagged content
	if whitespaceEnd > 0 {
	return content[:whitespaceEnd] + startToken + content[whitespaceEnd:]
	}
	return startToken + content
	}

	// ExtractReasoning extracts reasoning content from thinking tags and returns
	// both the extracted reasoning and the cleaned content (with tags removed).
	// It handles <thinking>...</thinking> and <think>...</think> tags.
	// Multiple reasoning blocks are concatenated with newlines.
	// Custom tag pairs from config are checked first, then default tag pairs.
	func ExtractReasoning(content string, config *Config) (reasoning string, cleanedContent string) {
	if content == "" {
	return "", content
	}

	var reasoningParts []string
	var cleanedParts []string
	remaining := content

	// Define default tag pairs to look for (matching llama.cpp's chat-parser.cpp)
	defaultTagPairs := []struct {
	start string
	end string
	}{
	{"<\|START_THINKING\|>", "<\|END_THINKING\|>"}, // Command-R models
	{"<\|inner_prefix\|>", "<\|inner_suffix\|>"}, // Apertus models
	{"<seed:think>", "</seed:think>"}, // Seed models
	{"<think>", "</think>"}, // DeepSeek, Granite, ExaOne models
	{"<\|think\|>", "<\|end\|><\|begin\|>assistant<\|content\|>"}, // Solar Open models (complex end)
	{"<thinking>", "</thinking>"}, // General thinking tag
	{"[THINK]", "[/THINK]"}, // Magistral models
	}

	// Merge custom tag pairs with default tag pairs (custom pairs first for priority)
	var tagPairs []struct {
	start string
	end string
	}
	if config != nil && len(config.TagPairs) > 0 {
	for _, pair := range config.TagPairs {
	if pair.Start != "" && pair.End != "" {
	tagPairs = append(tagPairs, struct {
	start string
	end string
	}{pair.Start, pair.End})
	}
	}
	}
	// Add default tag pairs
	for _, pair := range defaultTagPairs {
	tagPairs = append(tagPairs, pair)
	}

	// Track the last position we've processed
	lastPos := 0

	for {
	// Find the earliest tag start
	earliestStart := -1
	earliestEnd := -1
	isUnclosed := false
	var matchedTag struct {
	start string
	end string
	}

	for _, tagPair := range tagPairs {
	startIdx := strings.Index(remaining[lastPos:], tagPair.start)
	if startIdx == -1 {
	continue
	}
	startIdx += lastPos

	// Find the corresponding end tag
	endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
	if endIdx == -1 {
	// Unclosed tag - extract what we have
	if earliestStart == -1 \|\| startIdx < earliestStart {
	earliestStart = startIdx
	earliestEnd = len(remaining)
	isUnclosed = true
	matchedTag = tagPair
	}
	continue
	}
	endIdx += startIdx + len(tagPair.start)

	// Found a complete tag pair
	if earliestStart == -1 \|\| startIdx < earliestStart {
	earliestStart = startIdx
	earliestEnd = endIdx + len(tagPair.end)
	isUnclosed = false
	matchedTag = tagPair
	}
	}

	if earliestStart == -1 {
	// No more tags found, add remaining content
	if lastPos < len(remaining) {
	cleanedParts = append(cleanedParts, remaining[lastPos:])
	}
	break
	}

	// Add content before the tag
	if earliestStart > lastPos {
	cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
	}

	// Extract reasoning content
	reasoningStart := earliestStart + len(matchedTag.start)
	// For unclosed tags, earliestEnd is already at the end of the string
	// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
	var reasoningEnd int
	if isUnclosed {
	// Unclosed tag - extract everything to the end
	reasoningEnd = len(remaining)
	} else {
	// Closed tag - exclude the end tag
	reasoningEnd = earliestEnd - len(matchedTag.end)
	}
	if reasoningEnd > reasoningStart {
	reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
	if reasoningContent != "" {
	reasoningParts = append(reasoningParts, reasoningContent)
	}
	}

	// Move past this tag
	lastPos = earliestEnd
	}

	// Combine reasoning parts
	reasoning = strings.Join(reasoningParts, "\n\n")
	// Combine cleaned content parts
	cleanedContent = strings.Join(cleanedParts, "")

	return reasoning, cleanedContent
	}