Spaces:
Sleeping
Sleeping
| """ | |
| Rate limiting and caching utilities. | |
| """ | |
| import os | |
| import time | |
| import hashlib | |
| from datetime import datetime | |
| from typing import Optional, Any | |
| from dataclasses import dataclass, field | |
| from collections import defaultdict | |
| import diskcache | |
| # Rate limit configuration from GPT-OSS API limits | |
| RATE_LIMITS = { | |
| "rpm": 30, # Requests per minute | |
| "rpd": 1000, # Requests per day | |
| "tpm": 8000, # Tokens per minute | |
| "tpd": 200000, # Tokens per day | |
| } | |
| # Wolfram Alpha rate limit | |
| WOLFRAM_MONTHLY_LIMIT = 2000 | |
| class RateLimitTracker: | |
| """Track rate limits per session.""" | |
| requests_this_minute: int = 0 | |
| requests_today: int = 0 | |
| tokens_this_minute: int = 0 | |
| tokens_today: int = 0 | |
| minute_start: float = field(default_factory=time.time) | |
| day_start: float = field(default_factory=time.time) | |
| def reset_if_needed(self): | |
| """Reset counters if time window has passed.""" | |
| now = time.time() | |
| # Reset minute counters | |
| if now - self.minute_start >= 60: | |
| self.requests_this_minute = 0 | |
| self.tokens_this_minute = 0 | |
| self.minute_start = now | |
| # Reset daily counters | |
| if now - self.day_start >= 86400: | |
| self.requests_today = 0 | |
| self.tokens_today = 0 | |
| self.day_start = now | |
| def can_make_request(self, estimated_tokens: int = 1000) -> tuple[bool, str]: | |
| """Check if a request can be made within rate limits.""" | |
| self.reset_if_needed() | |
| if self.requests_this_minute >= RATE_LIMITS["rpm"]: | |
| wait_time = int(60 - (time.time() - self.minute_start)) | |
| return False, f"Rate limit exceeded. Please wait {wait_time} seconds." | |
| if self.requests_today >= RATE_LIMITS["rpd"]: | |
| return False, "Daily request limit reached. Please try again tomorrow." | |
| if self.tokens_this_minute + estimated_tokens > RATE_LIMITS["tpm"]: | |
| wait_time = int(60 - (time.time() - self.minute_start)) | |
| return False, f"Token limit exceeded. Please wait {wait_time} seconds." | |
| if self.tokens_today + estimated_tokens > RATE_LIMITS["tpd"]: | |
| return False, "Daily token limit reached. Please try again tomorrow." | |
| return True, "" | |
| def record_usage(self, tokens_used: int): | |
| """Record token usage.""" | |
| self.requests_this_minute += 1 | |
| self.requests_today += 1 | |
| self.tokens_this_minute += tokens_used | |
| self.tokens_today += tokens_used | |
| class SessionRateLimiter: | |
| """Manage rate limits across sessions.""" | |
| def __init__(self): | |
| self._trackers: dict[str, RateLimitTracker] = defaultdict(RateLimitTracker) | |
| def get_tracker(self, session_id: str) -> RateLimitTracker: | |
| return self._trackers[session_id] | |
| def check_limit(self, session_id: str, estimated_tokens: int = 1000) -> tuple[bool, str]: | |
| return self._trackers[session_id].can_make_request(estimated_tokens) | |
| def record(self, session_id: str, tokens: int): | |
| self._trackers[session_id].record_usage(tokens) | |
| # Global rate limiter instance | |
| rate_limiter = SessionRateLimiter() | |
| class WolframRateLimiter: | |
| """ | |
| Track Wolfram Alpha API usage with 2000 requests/month limit. | |
| Uses persistent disk cache to survive restarts. | |
| """ | |
| def __init__(self, cache_dir: str = ".wolfram_cache"): | |
| self.cache = diskcache.Cache(cache_dir) | |
| self.monthly_limit = WOLFRAM_MONTHLY_LIMIT | |
| def _get_month_key(self) -> str: | |
| """Get current month key for tracking.""" | |
| now = datetime.now() | |
| return f"wolfram_usage_{now.year}_{now.month}" | |
| def get_usage(self) -> int: | |
| """Get current month's usage count.""" | |
| key = self._get_month_key() | |
| return self.cache.get(key, 0) | |
| def can_make_request(self) -> tuple[bool, str, int]: | |
| """ | |
| Check if Wolfram API can be called. | |
| Returns: (can_proceed, error_message, remaining_requests) | |
| """ | |
| usage = self.get_usage() | |
| remaining = self.monthly_limit - usage | |
| if usage >= self.monthly_limit: | |
| return False, "Wolfram Alpha monthly limit (2000 requests) reached. Using fallback.", 0 | |
| # Warn when close to limit | |
| if remaining <= 100: | |
| return True, f"Warning: Only {remaining} Wolfram requests remaining this month.", remaining | |
| return True, "", remaining | |
| def record_usage(self): | |
| """Record one API call.""" | |
| key = self._get_month_key() | |
| current = self.cache.get(key, 0) | |
| # Set with 32-day TTL to auto-cleanup old months | |
| self.cache.set(key, current + 1, expire=86400 * 32) | |
| def get_status(self) -> dict: | |
| """Get current rate limit status.""" | |
| usage = self.get_usage() | |
| return { | |
| "used": usage, | |
| "limit": self.monthly_limit, | |
| "remaining": max(0, self.monthly_limit - usage), | |
| "month": datetime.now().strftime("%Y-%m"), | |
| } | |
| # Global Wolfram rate limiter | |
| wolfram_limiter = WolframRateLimiter() | |
| class QueryCache: | |
| """Cache for repeated queries to reduce API calls.""" | |
| def __init__(self, cache_dir: str = ".cache"): | |
| self.cache = diskcache.Cache(cache_dir) | |
| self.ttl = 3600 * 24 * 7 # 7 days TTL for math queries | |
| def _make_key(self, query: str, context: str = "") -> str: | |
| """Create cache key from query and context.""" | |
| content = f"{query}:{context}" | |
| return hashlib.sha256(content.encode()).hexdigest() | |
| def get(self, query: str, context: str = "") -> Optional[str]: | |
| """Get cached response if available.""" | |
| key = self._make_key(query, context) | |
| return self.cache.get(key) | |
| def set(self, query: str, response: str, context: str = ""): | |
| """Cache a response.""" | |
| key = self._make_key(query, context) | |
| self.cache.set(key, response, expire=self.ttl) | |
| def clear(self): | |
| """Clear all cached responses.""" | |
| self.cache.clear() | |
| # Global cache instance | |
| query_cache = QueryCache() | |