alessio-vertemati commited on
Commit
850aa4f
·
1 Parent(s): aa2ac45
Files changed (3) hide show
  1. README.md +39 -1
  2. pyproject.toml +1 -1
  3. uv.lock +1 -1
README.md CHANGED
@@ -10,4 +10,42 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # Token Counter
14
+
15
+ A Gradio-based web application for counting tokens in text and web content using OpenAI's tiktoken library.
16
+
17
+ ## Features
18
+
19
+ ### Text Input Mode
20
+ - Paste or type text directly into the interface
21
+ - Real-time token counting as you type
22
+ - Support for multiple OpenAI model encodings (GPT-4.1, GPT-5, O1, O3, O4-mini, embeddings, and more)
23
+ - Displays token count and character count
24
+
25
+ ### URL Input Mode
26
+ - Fetch and analyze content from any URL
27
+ - Counts tokens for both HTML and Markdown representations
28
+ - Shows token counts and character counts for both formats
29
+ - One-click example URL for testing
30
+ - **15-minute response caching** to prevent flooding target URLs with repeated requests
31
+
32
+ ## Supported Models
33
+
34
+ The tool supports token counting for various OpenAI model families:
35
+ - **Reasoning models**: o1, o3, o4-mini
36
+ - **Chat models**: gpt-5, gpt-4.1, gpt-4o, gpt-4, gpt-3.5-turbo
37
+ - **Embedding models**: text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large
38
+ - **Legacy models**: davinci-002, babbage-002
39
+
40
+ ## How It Works
41
+
42
+ Token counting uses the [tiktoken](https://github.com/openai/tiktoken) library to estimate the number of tokens that would be consumed by different OpenAI models. This is useful for:
43
+
44
+ - Estimating API costs
45
+ - Staying within model token limits
46
+ - Optimizing prompts and content
47
+ - Comparing token efficiency between HTML and Markdown formats
48
+
49
+ ## Caching
50
+
51
+ URL responses are cached for 15 minutes to reduce load on target servers. The status message indicates when cached content is being used and how old the cache is.
pyproject.toml CHANGED
@@ -5,7 +5,7 @@ description = "Text token counter"
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
- "gradio[mcp]>=6.0.0",
9
  "requests>=2.28",
10
  "python-dotenv>=1.2.1",
11
  "tiktoken>=0.12.0",
 
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
+ "gradio[mcp]>=6.2.0",
9
  "requests>=2.28",
10
  "python-dotenv>=1.2.1",
11
  "tiktoken>=0.12.0",
uv.lock CHANGED
@@ -1521,7 +1521,7 @@ dependencies = [
1521
 
1522
  [package.metadata]
1523
  requires-dist = [
1524
- { name = "gradio", extras = ["mcp"], specifier = ">=6.0.0" },
1525
  { name = "python-dotenv", specifier = ">=1.2.1" },
1526
  { name = "requests", specifier = ">=2.28" },
1527
  { name = "tiktoken", specifier = ">=0.12.0" },
 
1521
 
1522
  [package.metadata]
1523
  requires-dist = [
1524
+ { name = "gradio", extras = ["mcp"], specifier = ">=6.2.0" },
1525
  { name = "python-dotenv", specifier = ">=1.2.1" },
1526
  { name = "requests", specifier = ">=2.28" },
1527
  { name = "tiktoken", specifier = ">=0.12.0" },