package tokenizer import ( "log" "github.com/pkoukk/tiktoken-go" ) var encoding *tiktoken.Tiktoken // Init initializes the tokenizer with cl100k_base encoding // This should be called at startup to preload the encoding data func Init() error { var err error encoding, err = tiktoken.GetEncoding("cl100k_base") if err != nil { log.Printf("[WARN] Failed to initialize tiktoken: %v, using fallback", err) return err } log.Printf("[INFO] Tiktoken initialized with cl100k_base encoding") return nil } // CountTokens counts the number of tokens in a text string func CountTokens(text string) int { if encoding == nil { // Fallback: estimate ~4 characters per token return len(text) / 4 } return len(encoding.Encode(text, nil, nil)) }