Spaces:

oneofftech
/

token-counter

Sleeping

App Files Files Community

alessio-vertemati commited on Dec 30, 2025

Commit

dfd9c92

1 Parent(s): 0a85b87

Add basic token counter

Browse files

Files changed (5) hide show

.gitignore +13 -0
.python-version +1 -0
app.py +222 -0
pyproject.toml +12 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.env

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

app.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import gradio as gr
+import os
+import asyncio
+import json
+import tiktoken
+import requests
+from typing import List, Tuple, Optional
+from dataclasses import dataclass
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+def count_tokens(text: str, model: str) -> Tuple[int, str]:
+    """Count tokens in text using the specified model encoding.
+    Args:
+        text: The input text to tokenize
+        model: The model name to use for encoding
+    Returns:
+        Tuple of (token_count, status_message)
+    """
+    if not text:
+        return 0, "No text provided"
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+        tokens = encoding.encode(text)
+        return len(tokens), f"✓ Counted {len(tokens)} tokens using {model} encoding"
+    except Exception as e:
+        return 0, f"Error: {str(e)}"
+def count_tokens_from_url(url: str, model: str) -> Tuple[int, int, str]:
+    """Fetch content from URL and count tokens for both HTML and Markdown formats.
+    Args:
+        url: The URL to fetch
+        model: The model name to use for encoding
+    Returns:
+        Tuple of (html_token_count, markdown_token_count, status_message)
+    """
+    if not url:
+        return 0, 0, "No URL provided"
+    try:
+        # Fetch as HTML
+        html_response = requests.get(
+            url,
+            headers={"Accept": "text/html"},
+            timeout=10
+        )
+        html_response.raise_for_status()
+        html_content = html_response.text
+        # Fetch as Markdown
+        markdown_response = requests.get(
+            url,
+            headers={"Accept": "text/markdown"},
+            timeout=10
+        )
+        markdown_response.raise_for_status()
+        markdown_content = markdown_response.text
+        # Count tokens for both
+        encoding = tiktoken.encoding_for_model(model)
+        html_tokens = len(encoding.encode(html_content))
+        markdown_tokens = len(encoding.encode(markdown_content))
+        status = f"✓ Fetched from {url}\n"
+        status += f"HTML: {html_tokens} tokens ({len(html_content)} chars)\n"
+        status += f"Markdown: {markdown_tokens} tokens ({len(markdown_content)} chars)"
+        return html_tokens, markdown_tokens, status
+    except requests.exceptions.RequestException as e:
+        return 0, 0, f"Error fetching URL: {str(e)}"
+    except Exception as e:
+        return 0, 0, f"Error: {str(e)}"
+def main():
+    """Create and launch the Gradio interface."""
+    with gr.Blocks(title="Token counter") as demo:
+        gr.Markdown("""
+        # Token Counter
+        Count tokens in your text supporting different model encodings. Uses `tiktoken` to estimate the token count.
+        """)
+        with gr.Tabs():
+            with gr.Tab("Text Input"):
+                with gr.Row():
+                    with gr.Column():
+                        text_input = gr.Textbox(
+                            label="Input Text",
+                            placeholder="Enter your text here...",
+                            lines=10,
+                            max_lines=20
+                        )
+                        model_dropdown = gr.Dropdown(
+                            choices=[
+                                # reasoning
+                                "o1",
+                                "o3",
+                                "o4-mini",
+                                # chat
+                                "gpt-5",
+                                "gpt-4.1",
+                                "gpt-4o",
+                                "gpt-4",
+                                "gpt-3.5-turbo",
+                                "gpt-3.5",
+                                "gpt-35-turbo",
+                                "text-embedding-ada-002",
+                                "text-embedding-3-small",
+                                "text-embedding-3-large",
+                                "davinci-002",
+                                "babbage-002",
+                            ],
+                            value="gpt-4.1",
+                            label="Model"
+                        )
+                        count_btn = gr.Button("Count Tokens", variant="primary")
+                    with gr.Column():
+                        token_count = gr.Number(
+                            label="Token Count",
+                            value=0,
+                            interactive=False
+                        )
+                        status_msg = gr.Textbox(
+                            label="Status",
+                            interactive=False
+                        )
+                # Connect the button to the counting function
+                count_btn.click(
+                    fn=count_tokens,
+                    inputs=[text_input, model_dropdown],
+                    outputs=[token_count, status_msg]
+                )
+                # Also count on text change for real-time feedback
+                text_input.change(
+                    fn=count_tokens,
+                    inputs=[text_input, model_dropdown],
+                    outputs=[token_count, status_msg]
+                )
+            with gr.Tab("URL Input"):
+                with gr.Row():
+                    with gr.Column():
+                        url_input = gr.Textbox(
+                            label="URL",
+                            placeholder="https://oneofftech.xyz/blog/parxing-week-2025/?utm=token-counter",
+                            lines=1
+                        )
+                        url_model_dropdown = gr.Dropdown(
+                            choices=[
+                                # reasoning
+                                "o1",
+                                "o3",
+                                "o4-mini",
+                                # chat
+                                "gpt-5",
+                                "gpt-4.1",
+                                "gpt-4o",
+                                "gpt-4",
+                                "gpt-3.5-turbo",
+                                "gpt-3.5",
+                                "gpt-35-turbo",
+                                "text-embedding-ada-002",
+                                "text-embedding-3-small",
+                                "text-embedding-3-large",
+                                "davinci-002",
+                                "babbage-002",
+                            ],
+                            value="gpt-4.1",
+                            label="Model"
+                        )
+                        url_count_btn = gr.Button("Count Tokens from URL", variant="primary")
+                    with gr.Column():
+                        html_token_count = gr.Number(
+                            label="HTML Token Count",
+                            value=0,
+                            interactive=False
+                        )
+                        markdown_token_count = gr.Number(
+                            label="Markdown Token Count",
+                            value=0,
+                            interactive=False
+                        )
+                        url_status_msg = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            lines=3
+                        )
+                # Connect the URL button to the URL counting function
+                url_count_btn.click(
+                    fn=count_tokens_from_url,
+                    inputs=[url_input, url_model_dropdown],
+                    outputs=[html_token_count, markdown_token_count, url_status_msg]
+                )
+    demo.launch(theme=gr.themes.Soft())
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,12 @@

+[project]
+name = "token-counter"
+version = "0.1.0"
+description = "Text token counter"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "gradio[mcp]>=6.0.0",
+    "requests>=2.28",
+    "python-dotenv>=1.2.1",
+    "tiktoken>=0.12.0",
+]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff