Spaces:

oneofftech
/

token-counter

Sleeping

App Files Files Community

alessio-vertemati commited on Dec 30, 2025

Commit

58c1383

1 Parent(s): dfd9c92

Url cache

Browse files

Files changed (1) hide show

app.py +35 -1

app.py CHANGED Viewed

@@ -4,13 +4,18 @@ import asyncio
 import json
 import tiktoken
 import requests
-from typing import List, Tuple, Optional
 from dataclasses import dataclass
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 def count_tokens(text: str, model: str) -> Tuple[int, str]:
     """Count tokens in text using the specified model encoding.
@@ -45,6 +50,28 @@ def count_tokens_from_url(url: str, model: str) -> Tuple[int, int, str]:
         return 0, 0, "No URL provided"
     try:
         # Fetch as HTML
         html_response = requests.get(
             url,
@@ -63,6 +90,13 @@ def count_tokens_from_url(url: str, model: str) -> Tuple[int, int, str]:
         markdown_response.raise_for_status()
         markdown_content = markdown_response.text
         # Count tokens for both
         encoding = tiktoken.encoding_for_model(model)
         html_tokens = len(encoding.encode(html_content))

 import json
 import tiktoken
 import requests
+import time
+from typing import List, Tuple, Optional, Dict
 from dataclasses import dataclass
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
+# URL response cache: {url: {"html": str, "markdown": str, "timestamp": float}}
+_url_cache: Dict[str, Dict] = {}
+CACHE_DURATION = 900  # 15 minutes in seconds
 def count_tokens(text: str, model: str) -> Tuple[int, str]:
     """Count tokens in text using the specified model encoding.
         return 0, 0, "No URL provided"
     try:
+        # Check cache first
+        current_time = time.time()
+        if url in _url_cache:
+            cached_entry = _url_cache[url]
+            if current_time - cached_entry["timestamp"] < CACHE_DURATION:
+                # Use cached content
+                html_content = cached_entry["html"]
+                markdown_content = cached_entry["markdown"]
+                # Count tokens for both
+                encoding = tiktoken.encoding_for_model(model)
+                html_tokens = len(encoding.encode(html_content))
+                markdown_tokens = len(encoding.encode(markdown_content))
+                cache_age = int(current_time - cached_entry["timestamp"])
+                status = f"✓ Fetched from cache ({cache_age}s old)\n"
+                status += f"HTML: {html_tokens} tokens ({len(html_content)} chars)\n"
+                status += f"Markdown: {markdown_tokens} tokens ({len(markdown_content)} chars)"
+                return html_tokens, markdown_tokens, status
+        # Cache miss or expired - fetch fresh content
         # Fetch as HTML
         html_response = requests.get(
             url,
         markdown_response.raise_for_status()
         markdown_content = markdown_response.text
+        # Update cache
+        _url_cache[url] = {
+            "html": html_content,
+            "markdown": markdown_content,
+            "timestamp": current_time
+        }
         # Count tokens for both
         encoding = tiktoken.encoding_for_model(model)
         html_tokens = len(encoding.encode(html_content))