File size: 783 Bytes
80ffd2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package tokenizer

import (
	"log"

	"github.com/pkoukk/tiktoken-go"
)

var encoding *tiktoken.Tiktoken

// Init initializes the tokenizer with cl100k_base encoding
// This should be called at startup to preload the encoding data
func Init() error {
	var err error
	encoding, err = tiktoken.GetEncoding("cl100k_base")
	if err != nil {
		log.Printf("[WARN] Failed to initialize tiktoken: %v, using fallback", err)
		return err
	}
	log.Printf("[INFO] Tiktoken initialized with cl100k_base encoding")
	return nil
}

// CountTokens counts the number of tokens in a text string
func CountTokens(text string) int {
	if encoding == nil {
		// Fallback: estimate ~4 characters per token
		return len(text) / 4
	}
	return len(encoding.Encode(text, nil, nil))
}