Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import asyncio | |
| import json | |
| import tiktoken | |
| import requests | |
| import time | |
| from typing import List, Tuple, Optional, Dict | |
| from dataclasses import dataclass | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| # URL response cache: {url: {"html": str, "markdown": str, "timestamp": float}} | |
| _url_cache: Dict[str, Dict] = {} | |
| CACHE_DURATION = 900 # 15 minutes in seconds | |
| def count_tokens(text: str, model: str) -> Tuple[int, str]: | |
| """Count tokens in text using the specified model encoding. | |
| Args: | |
| text: The input text to tokenize | |
| model: The model name to use for encoding | |
| Returns: | |
| Tuple of (token_count, status_message) | |
| """ | |
| if not text: | |
| return 0, "No text provided" | |
| try: | |
| encoding = tiktoken.encoding_for_model(model) | |
| tokens = encoding.encode(text) | |
| return len(tokens), f"✓ Counted {len(tokens)} tokens using {model} encoding" | |
| except Exception as e: | |
| return 0, f"Error: {str(e)}" | |
| def count_tokens_from_url(url: str, model: str) -> Tuple[int, int, str]: | |
| """Fetch content from URL and count tokens for both HTML and Markdown formats. | |
| Args: | |
| url: The URL to fetch | |
| model: The model name to use for encoding | |
| Returns: | |
| Tuple of (html_token_count, markdown_token_count, status_message) | |
| """ | |
| if not url: | |
| return 0, 0, "No URL provided" | |
| try: | |
| # Check cache first | |
| current_time = time.time() | |
| if url in _url_cache: | |
| cached_entry = _url_cache[url] | |
| if current_time - cached_entry["timestamp"] < CACHE_DURATION: | |
| # Use cached content | |
| html_content = cached_entry["html"] | |
| markdown_content = cached_entry["markdown"] | |
| # Count tokens for both | |
| encoding = tiktoken.encoding_for_model(model) | |
| html_tokens = len(encoding.encode(html_content)) | |
| markdown_tokens = len(encoding.encode(markdown_content)) | |
| cache_age = int(current_time - cached_entry["timestamp"]) | |
| status = f"✓ Fetched from cache ({cache_age}s old)\n" | |
| status += f"HTML: {html_tokens} tokens ({len(html_content)} chars)\n" | |
| status += f"Markdown: {markdown_tokens} tokens ({len(markdown_content)} chars)" | |
| return html_tokens, markdown_tokens, status | |
| # Cache miss or expired - fetch fresh content | |
| # Fetch as HTML | |
| html_response = requests.get( | |
| url, | |
| headers={"Accept": "text/html"}, | |
| timeout=10 | |
| ) | |
| html_response.raise_for_status() | |
| html_content = html_response.text | |
| # Fetch as Markdown | |
| markdown_response = requests.get( | |
| url, | |
| headers={"Accept": "text/markdown"}, | |
| timeout=10 | |
| ) | |
| markdown_response.raise_for_status() | |
| markdown_content = markdown_response.text | |
| # Update cache | |
| _url_cache[url] = { | |
| "html": html_content, | |
| "markdown": markdown_content, | |
| "timestamp": current_time | |
| } | |
| # Count tokens for both | |
| encoding = tiktoken.encoding_for_model(model) | |
| html_tokens = len(encoding.encode(html_content)) | |
| markdown_tokens = len(encoding.encode(markdown_content)) | |
| status = f"✓ Fetched from {url}\n" | |
| status += f"HTML: {html_tokens} tokens ({len(html_content)} chars)\n" | |
| status += f"Markdown: {markdown_tokens} tokens ({len(markdown_content)} chars)" | |
| return html_tokens, markdown_tokens, status | |
| except requests.exceptions.RequestException as e: | |
| return 0, 0, f"Error fetching URL: {str(e)}" | |
| except Exception as e: | |
| return 0, 0, f"Error: {str(e)}" | |
| def main(): | |
| """Create and launch the Gradio interface.""" | |
| with gr.Blocks(title="Token counter") as demo: | |
| gr.Markdown(""" | |
| # Token Counter | |
| Count tokens in your text supporting different model encodings. Uses `tiktoken` to estimate the token count. | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("Text Input"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter your text here...", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| choices=[ | |
| # reasoning | |
| "o1", | |
| "o3", | |
| "o4-mini", | |
| # chat | |
| "gpt-5", | |
| "gpt-4.1", | |
| "gpt-4o", | |
| "gpt-4", | |
| "gpt-3.5-turbo", | |
| "gpt-3.5", | |
| "gpt-35-turbo", | |
| "text-embedding-ada-002", | |
| "text-embedding-3-small", | |
| "text-embedding-3-large", | |
| "davinci-002", | |
| "babbage-002", | |
| ], | |
| value="gpt-4.1", | |
| label="Model" | |
| ) | |
| count_btn = gr.Button("Count Tokens", variant="primary") | |
| with gr.Column(): | |
| token_count = gr.Number( | |
| label="Token Count", | |
| value=0, | |
| interactive=False | |
| ) | |
| status_msg = gr.Textbox( | |
| label="Status", | |
| interactive=False | |
| ) | |
| # Connect the button to the counting function | |
| count_btn.click( | |
| fn=count_tokens, | |
| inputs=[text_input, model_dropdown], | |
| outputs=[token_count, status_msg] | |
| ) | |
| # Also count on text change for real-time feedback | |
| text_input.change( | |
| fn=count_tokens, | |
| inputs=[text_input, model_dropdown], | |
| outputs=[token_count, status_msg] | |
| ) | |
| with gr.Tab("URL Input"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| url_input = gr.Textbox( | |
| label="URL", | |
| placeholder="Enter URL here...", | |
| lines=1 | |
| ) | |
| gr.Markdown("**Example:** `https://oneofftech.xyz/blog/parxing-week-2025/?utm=token-counter`") | |
| use_example_btn = gr.Button("Use Example URL", size="sm") | |
| url_model_dropdown = gr.Dropdown( | |
| choices=[ | |
| # reasoning | |
| "o1", | |
| "o3", | |
| "o4-mini", | |
| # chat | |
| "gpt-5", | |
| "gpt-4.1", | |
| "gpt-4o", | |
| "gpt-4", | |
| "gpt-3.5-turbo", | |
| "gpt-3.5", | |
| "gpt-35-turbo", | |
| "text-embedding-ada-002", | |
| "text-embedding-3-small", | |
| "text-embedding-3-large", | |
| "davinci-002", | |
| "babbage-002", | |
| ], | |
| value="gpt-4.1", | |
| label="Model" | |
| ) | |
| url_count_btn = gr.Button("Count Tokens from URL", variant="primary") | |
| with gr.Column(): | |
| html_token_count = gr.Number( | |
| label="HTML Token Count", | |
| value=0, | |
| interactive=False | |
| ) | |
| markdown_token_count = gr.Number( | |
| label="Markdown Token Count", | |
| value=0, | |
| interactive=False | |
| ) | |
| url_status_msg = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| lines=3 | |
| ) | |
| # Connect the example button to fill the URL input | |
| use_example_btn.click( | |
| fn=lambda: "https://oneofftech.xyz/blog/parxing-week-2025/?utm=token-counter", | |
| inputs=[], | |
| outputs=[url_input] | |
| ) | |
| # Connect the URL button to the URL counting function | |
| url_count_btn.click( | |
| fn=count_tokens_from_url, | |
| inputs=[url_input, url_model_dropdown], | |
| outputs=[html_token_count, markdown_token_count, url_status_msg] | |
| ) | |
| demo.launch(theme=gr.themes.Soft()) | |
| if __name__ == "__main__": | |
| main() | |