Spaces:

sharktide
/

lightning

Running

App Files Files Community

R.C.M. commited on 20 days ago

Commit

99d08e9

1 Parent(s): 4a46cf8

Improved structure

Browse files

Files changed (6) hide show

app.py +174 -393
assets.py → helper/assets.py +0 -0
keywords.py → helper/keywords.py +0 -0
helper/misc.py +157 -0
helper/ratelimit.py +131 -0
subscriptions.py → helper/subscriptions.py +0 -0

app.py CHANGED Viewed

@@ -3,7 +3,12 @@ import time
 import hashlib
 from fastapi import FastAPI, Request, HTTPException, status, Header
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import Response, JSONResponse, StreamingResponse, RedirectResponse
 import httpx
 from bs4 import BeautifulSoup
 from typing import List, Dict, Any
@@ -11,12 +16,49 @@ import asyncio
 import re
 from random import randint
 from urllib.parse import quote
-import uuid
 import base64
-from subscriptions import fetch_subscription, normalize_plan_key, usage_locks, usage_store, USAGE_PERIODS, TIER_CONFIG, PLAN_ORDER
 from typing import Optional
-from keywords import *
-from assets import save_base64_image, cleanup_image, is_base64_image, asset_router
 app = FastAPI()
@@ -27,266 +69,16 @@ app.add_middleware(
     allow_headers=["*"],
 )
 app.include_router(asset_router)
-@app.get("/")
-async def reroute_to_status():
-    return RedirectResponse(url="https://inference.js.org", status_code=status.HTTP_308_PERMANENT_REDIRECT)
-OLLAMA_LIBRARY_URL = "https://ollama.com/library"
-IDENTITY_CACHE_TTL_SECONDS = 60
-identity_cache = {}
-CLIENT_BIND_TTL_SECONDS = int(os.getenv("CLIENT_BIND_TTL_SECONDS", str(8 * 24 * 60 * 60)))
-MAX_CLIENT_ID_LENGTH = 128
-client_subject_bindings = {}
-MAX_CHAT_PROMPT_CHARS = int(os.getenv("MAX_CHAT_PROMPT_CHARS", "120000"))
-MAX_CHAT_PROMPT_BYTES = int(os.getenv("MAX_CHAT_PROMPT_BYTES", "500000"))
-MAX_GROQ_PROMPT_CHARS = int(os.getenv("MAX_GROQ_PROMPT_CHARS", "90000"))
-MAX_GROQ_PROMPT_BYTES = int(os.getenv("MAX_GROQ_PROMPT_BYTES", "350000"))
-MAX_MEDIA_PROMPT_CHARS = int(os.getenv("MAX_MEDIA_PROMPT_CHARS", "4000"))
-MAX_MEDIA_PROMPT_BYTES = int(os.getenv("MAX_MEDIA_PROMPT_BYTES", "16000"))
-def extract_user_text(messages: list) -> str:
-    return " ".join(
-        message_content_to_text(m.get("content"))
-        for m in messages
-        if isinstance(m, dict) and m.get("role") == "user"
-    ).lower()
-def get_usage_period_key(metric: str) -> str:
-    now = time.gmtime()
-    period = USAGE_PERIODS.get(metric, "daily")
-    if period == "weekly":
-        iso_year, iso_week, _ = time.strftime("%G %V %u", now).split(" ")
-        return f"{iso_year}-W{iso_week}"
-    return time.strftime("%Y-%m-%d", now)
-def sanitize_client_id(raw_client_id: Optional[str]) -> Optional[str]:
-    if not isinstance(raw_client_id, str):
-        return None
-    trimmed = raw_client_id.strip()
-    if not trimmed or len(trimmed) > MAX_CLIENT_ID_LENGTH:
-        return None
-    if not re.match(r"^[A-Za-z0-9._:-]+$", trimmed):
-        return None
-    return trimmed
-def get_usage_lock(metric: str, subject: str) -> asyncio.Lock:
-    metric_locks = usage_locks.get(metric)
-    if metric_locks is None:
-        metric_locks = {}
-        usage_locks[metric] = metric_locks
-    lock = metric_locks.get(subject)
-    if lock is None:
-        lock = asyncio.Lock()
-        metric_locks[subject] = lock
-    return lock
-def build_default_subject(request: Request, client_id: Optional[str]) -> str:
-    if client_id:
-        client_hash = hashlib.sha256(client_id.encode("utf-8")).hexdigest()[:24]
-        return f"client:{client_hash}"
-    host = request.client.host if request.client else "unknown"
-    user_agent = request.headers.get("user-agent", "")
-    ua_hash = (
-        hashlib.sha256(user_agent.encode("utf-8")).hexdigest()[:12]
-        if user_agent
-        else "noua"
     )
-    return f"anon:{host}:{ua_hash}"
-def bind_client_subject(client_id: Optional[str], subject: str, plan_key: str):
-    if not client_id:
-        return
-    client_subject_bindings[client_id] = {
-        "subject": subject,
-        "plan_key": plan_key,
-        "expires_at": time.time() + CLIENT_BIND_TTL_SECONDS,
-    }
-def resolve_bound_subject(client_id: Optional[str], fallback_subject: str) -> str:
-    if not client_id:
-        return fallback_subject
-    bound = client_subject_bindings.get(client_id)
-    if not bound:
-        return fallback_subject
-    if bound.get("expires_at", 0) <= time.time():
-        client_subject_bindings.pop(client_id, None)
-        return fallback_subject
-    return bound.get("subject", fallback_subject)
-def normalize_prompt_value(prompt: Optional[str], field_name: str = "prompt") -> str:
-    if not isinstance(prompt, str):
-        raise HTTPException(status_code=400, detail=f"{field_name} is required")
-    normalized = prompt.strip()
-    if not normalized:
-        raise HTTPException(status_code=400, detail=f"{field_name} is required")
-    return normalized
-def enforce_prompt_size(prompt: str, max_chars: int, max_bytes: int, context: str):
-    char_len = len(prompt)
-    byte_len = len(prompt.encode("utf-8"))
-    if char_len > max_chars or byte_len > max_bytes:
-        raise HTTPException(
-            status_code=413,
-            detail=(
-                f"{context} is too large ({char_len} chars, {byte_len} bytes). "
-                f"Max allowed is {max_chars} chars or {max_bytes} bytes."
-            ),
-        )
-def message_content_to_text(content: Any) -> str:
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: List[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-                continue
-            if isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str):
-                    parts.append(text)
-        return " ".join(parts)
-    return ""
-def calculate_messages_size(messages: list) -> tuple[int, int]:
-    total_chars = 0
-    total_bytes = 0
-    for message in messages:
-        if not isinstance(message, dict):
-            continue
-        text = message_content_to_text(message.get("content"))
-        if not text:
-            continue
-        total_chars += len(text)
-        total_bytes += len(text.encode("utf-8"))
-    return total_chars, total_bytes
-def get_usage_snapshot_for_subject(plan_key: str, subject: str) -> Dict[str, Dict[str, Any]]:
-    plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
-    plan_limits = plan.get("limits", {})
-    snapshot: Dict[str, Dict[str, Any]] = {}
-    for metric in usage_store.keys():
-        limit = plan_limits.get(metric)
-        window_key = get_usage_period_key(metric)
-        entry = usage_store[metric].get(subject)
-        used = 0
-        if entry and entry.get("window") == window_key:
-            used = max(0, int(entry.get("count", 0)))
-        remaining = None if limit is None else max(0, int(limit) - used)
-        snapshot[metric] = {
-            "limit": limit,
-            "used": used,
-            "remaining": remaining,
-            "window": window_key,
-            "period": USAGE_PERIODS.get(metric, "daily"),
-        }
-    return snapshot
-async def resolve_rate_limit_identity(
-    request: Request,
-    authorization: Optional[str],
-    client_id: Optional[str] = None,
-) -> tuple[str, str]:
-    now = time.time()
-    normalized_client_id = sanitize_client_id(client_id)
-    default_subject = build_default_subject(request, normalized_client_id)
-    if not authorization or not authorization.startswith("Bearer "):
-        return "free", resolve_bound_subject(normalized_client_id, default_subject)
-    token = authorization.split(" ", 1)[1].strip()
-    if not token:
-        return "free", resolve_bound_subject(normalized_client_id, default_subject)
-    cached = identity_cache.get(token)
-    if cached and cached.get("expires_at", 0) > now:
-        plan_key = cached.get("plan_key", "free")
-        subject = cached.get("subject", default_subject)
-        bind_client_subject(normalized_client_id, subject, plan_key)
-        return plan_key, subject
-    try:
-        sub = await fetch_subscription(token)
-    except Exception:
-        return "free", resolve_bound_subject(normalized_client_id, default_subject)
-    if not isinstance(sub, dict) or sub.get("error"):
-        return "free", resolve_bound_subject(normalized_client_id, default_subject)
-    email = sub.get("email")
-    if isinstance(email, str) and email.strip():
-        subject = f"user:{email.strip().lower()}"
-    else:
-        subject = default_subject
-    plan_key = normalize_plan_key(sub.get("plan_key"))
-    identity_cache[token] = {
-        "plan_key": plan_key,
-        "subject": subject,
-        "expires_at": now + IDENTITY_CACHE_TTL_SECONDS,
-    }
-    bind_client_subject(normalized_client_id, subject, plan_key)
-    return plan_key, subject
-async def enforce_rate_limit(
-    request: Request,
-    authorization: Optional[str],
-    metric: str,
-    client_id: Optional[str] = None,
-) -> Dict[str, Optional[int | str]]:
-    if metric not in usage_store:
-        raise HTTPException(status_code=500, detail=f"Unknown limit metric: {metric}")
-    plan_key, subject = await resolve_rate_limit_identity(request, authorization, client_id)
-    plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
-    plan_limits = plan.get("limits", {})
-    limit = plan_limits.get(metric)
-    window_key = get_usage_period_key(metric)
-    lock = get_usage_lock(metric, subject)
-    async with lock:
-        bucket = usage_store[metric]
-        entry = bucket.get(subject)
-        if not entry or entry.get("window") != window_key:
-            entry = {"window": window_key, "count": 0}
-            bucket[subject] = entry
-        if limit is not None and entry["count"] >= int(limit):
-            raise HTTPException(
-                status_code=429,
-                detail=f"{metric} limit reached for {plan.get('name', 'current plan')}",
-            )
-        entry["count"] += 1
-        remaining = None if limit is None else max(0, int(limit) - entry["count"])
-        return {
-            "plan_key": plan_key,
-            "remaining": remaining,
-            "used": entry["count"],
-            "window": window_key,
-        }
-async def check_audio_rate_limit(
-    request: Request,
-    authorization: Optional[str],
-    client_id: Optional[str] = None,
-):
-    await enforce_rate_limit(request, authorization, "audioWeekly", client_id)
 def is_complex_reasoning(prompt: str) -> bool:
@@ -317,22 +109,7 @@ def is_cinematic_image_prompt(prompt: str) -> bool:
             return True
     return False
-async def check_image_rate_limit(
-    request: Request,
-    authorization: Optional[str],
-    client_id: Optional[str] = None,
-):
-    await enforce_rate_limit(request, authorization, "imagesDaily", client_id)
-async def check_video_rate_limit(
-    request: Request,
-    authorization: Optional[str],
-    client_id: Optional[str] = None,
-):
-    await enforce_rate_limit(request, authorization, "videosDaily", client_id)
-PKEY  = os.getenv("POLLINATIONS_KEY", "")
 PKEY2 = os.getenv("POLLINATIONS2_KEY", "")
 PKEY3 = os.getenv("POLLINATIONS3_KEY", "")
@@ -360,6 +137,7 @@ CEREBRAS_MODELS = [
     "zai-glm-4.7",
 ]
 async def check_chat_rate_limit(
     request: Request,
     authorization: Optional[str],
@@ -367,6 +145,7 @@ async def check_chat_rate_limit(
 ):
     return await enforce_rate_limit(request, authorization, "cloudChatDaily", client_id)
 @app.head("/status/sfx")
 async def head_sfx():
     return Response(
@@ -374,9 +153,10 @@ async def head_sfx():
         headers={
             "Content-Type": "audio/mpeg",
             "Accept-Ranges": "bytes",
-        }
     )
 @app.head("/status/image")
 async def head_image():
     return Response(
@@ -384,9 +164,10 @@ async def head_image():
         headers={
             "Content-Type": "image/jpeg",
             "Accept-Ranges": "bytes",
-        }
     )
 @app.head("/status/video")
 async def head_video():
     return Response(
@@ -394,9 +175,10 @@ async def head_video():
         headers={
             "Content-Type": "video/mp4",
             "Accept-Ranges": "bytes",
-        }
     )
 @app.head("/status/text")
 async def head_text():
     return Response(
@@ -407,35 +189,27 @@ async def head_text():
         },
     )
 @app.get("/status")
 async def get_status():
     notify = ""
     services = {
-        "Video Generation": {
-            "code": 200,
-            "state": "ok",
-            "message": "Running normally"
-        },
-        "Image Generation": {
-            "code": 200,
-            "state": "ok",
-            "message": "Running normally"
-        },
         "Lightning-Text v2": {
             "code": 200,
             "state": "ok",
-            "message": "Running normally"
         },
         "Music/SFX Generation": {
             "code": 200,
             "state": "ok",
-            "message": "Running normally"
-        }
     }
     overall_state = (
-        "ok" if all(s["state"] == "ok" for s in services.values())
-        else "degraded"
     )
     return JSONResponse(
@@ -444,10 +218,11 @@ async def get_status():
             "state": overall_state,
             "services": services,
             "notifications": notify,
-            "latest": "2.4.0"
-        }
     )
 @app.post("/gen/image")
 @app.get("/genimg/{prompt}")
 async def generate_image(
@@ -462,7 +237,9 @@ async def generate_image(
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
-    enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Image prompt")
     await check_image_rate_limit(request, authorization, x_client_id)
@@ -482,19 +259,17 @@ async def generate_image(
     print(f"[IMAGE GEN] Routing to model: {chosen_model}")
     url = f"https://gen.pollinations.ai/image/{quote(prompt, safe='')}?model={chosen_model}&key={PKEY2}"
-    async with httpx.AsyncClient(timeout = timeout) as client:
         response = await client.get(url)
     if response.status_code != 200:
         raise HTTPException(
-            status_code=500,
-            detail=f"Pollinations error: {response.status_code}"
         )
-    return Response(
-        content=response.content,
-        media_type="image/jpeg"
-    )
 @app.head("/models")
 @app.get("/models")
 async def get_models() -> List[Dict]:
@@ -511,22 +286,31 @@ async def get_models() -> List[Dict]:
         description = item.select_one("p.max-w-lg")
         sizes = [el.get_text(strip=True) for el in item.select("[x-test-size]")]
         pulls = item.select_one("[x-test-pull-count]")
-        tags = [t.get_text(strip=True) for t in item.select('span[class*="text-blue-600"]')]
         updated = item.select_one("[x-test-updated]")
         link = item.select_one("a")
-        models.append({
-            "name": name.get_text(strip=True) if name else "",
-            "description": description.get_text(strip=True) if description else "No description",
-            "sizes": sizes,
-            "pulls": pulls.get_text(strip=True) if pulls else "Unknown",
-            "tags": tags,
-            "updated": updated.get_text(strip=True) if updated else "Unknown",
-            "link": link.get("href") if link else None,
-        })
     return models
 @app.post("/gen/chat/completions")
 async def generate_text(
     request: Request,
@@ -549,11 +333,11 @@ async def generate_text(
         )
     prompt_text = extract_user_text(messages)
     uses_tools = (
         "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
     ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
     long_context = is_long_context(messages)
     code_present = contains_code(prompt_text)
     math_heavy = is_math_heavy(prompt_text)
@@ -562,26 +346,26 @@ async def generate_text(
     code_heavy = is_code_heavy(prompt_text, code_present, long_context)
     score = 0
     if long_context:
         score += 3
     if math_heavy:
         score += 3
     if structured_task:
         score += 2
     if code_present:
         score += 2
     if multi_q:
         score += 1
     for kw in REASONING_KEYWORDS:
         if kw in prompt_text:
             score += 1
     chosen_model = "meta-llama/llama-4-scout-17b-16e-instruct"
     provider = "groq"
     if score > 10:
@@ -594,11 +378,11 @@ async def generate_text(
         provider = "groq"
     elif code_present:
         if code_heavy and score >= 6:
             chosen_model = "gpt-oss-120b"
             provider = "cerebras"
         elif score >= 4:
             chosen_model = "llama-3.3-70b-versatile"
             provider = "groq"
@@ -621,7 +405,8 @@ async def generate_text(
     await check_chat_rate_limit(request, authorization, x_client_id)
     body["model"] = chosen_model
-    print(f"""
     [ADVANCED ROUTER]
       Score: {score}
       Uses tools: {uses_tools}
@@ -631,8 +416,8 @@ async def generate_text(
       Structured: {structured_task}
       Multi-question: {multi_q}
       → Selected: {chosen_model} ({provider})
-    """)
     stream = body.get("stream", False)
@@ -660,7 +445,7 @@ async def generate_text(
     if stream:
         body["stream"] = True
         async def event_generator():
             try:
                 async with httpx.AsyncClient(timeout=None) as client:
@@ -685,24 +470,24 @@ async def generate_text(
                                 .replace("\r", " ")
                             )
                             yield (
-                                "data: {\"error\": "
-                                f"\"Upstream provider error ({r.status_code}): {safe_error_payload}\""
                                 "}\n\n"
                             )
                             return
                         async for line in r.aiter_lines():
                             if line == "":
                                 yield "\n"
                                 continue
                             yield line + "\n"
             except asyncio.CancelledError:
                 return
             except Exception as e:
-                yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
         return StreamingResponse(
             event_generator(),
             media_type="text/event-stream",
@@ -728,13 +513,11 @@ async def generate_text(
                 "message": r.text[:1000],
             }
-        return JSONResponse(
-            status_code=r.status_code,
-            content=payload
-        )
     raise HTTPException(500, "Unknown provider routing error")
 @app.get("/gen/sfx/{prompt}")
 @app.post("/gen/sfx")
 async def gensfx(
@@ -748,7 +531,9 @@ async def gensfx(
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
-    enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt")
     await check_audio_rate_limit(request, authorization, x_client_id)
     url = f"https://gen.pollinations.ai/audio/{prompt}?model=elevenmusic&key={PKEY}"
     async with httpx.AsyncClient(timeout=None) as client:
@@ -765,13 +550,11 @@ async def gensfx(
                 "success": False,
                 "error": "Upstream music/sfx generation failed",
                 "status_code": response.status_code,
-                "message": body_text[:1000]
-            }
         )
-    return Response(
-        response.content,
-        media_type="audio/mpeg"
-    )
 @app.get("/gen/tts/{prompt}")
 @app.post("/gen/tts")
@@ -786,7 +569,9 @@ async def gensfx(
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
-    enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt")
     await check_audio_rate_limit(request, authorization, x_client_id)
     url = f"https://gen.pollinations.ai/audio/{prompt}?key={PKEY3}"
     async with httpx.AsyncClient(timeout=None) as client:
@@ -803,13 +588,12 @@ async def gensfx(
                 "success": False,
                 "error": "Upstream audio generation failed",
                 "status_code": response.status_code,
-                "message": body_text[:1000]
-            }
         )
-    return Response(
-        response.content,
-        media_type="audio/mpeg"
-    )
 @app.get("/gen/video/{prompt}")
 @app.post("/gen/video")
 @app.head("/gen/video")
@@ -824,25 +608,19 @@ async def genvideo_airforce(
             status_code=200,
             headers={
                 "Y-prompt": "string — required. The text prompt used to generate the video.",
                 "Y-ratio": "string — optional. Aspect ratio of the output video.",
                 "Y-ratio-values": "3:2,2:3,1:1",
                 "Y-ratio-default": "3:2",
                 "Y-mode": "string — optional. Controls generation style.",
                 "Y-mode-values": "normal,fun",
                 "Y-mode-default": "normal",
                 "Y-duration": "integer — optional. Duration in seconds (1–10).",
                 "Y-duration-default": "5",
                 "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
                 "Y-image_urls-max": "2",
                 "Y-response_format": "video/mp4",
-                "Y-model": "grok-video"
-            }
         )
     aspectRatio = "3:2"
@@ -863,7 +641,7 @@ async def genvideo_airforce(
         if ratio not in valid_ratios:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid aspect ratio '{ratio}'. Must be one of 3:2, 2:3, or 1:1."
             )
         if ratio in ratios:
             aspectRatio = ratio
@@ -871,7 +649,7 @@ async def genvideo_airforce(
         if mode not in valid_modes:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid mode '{mode}'. Must be 'normal' or 'fun'."
             )
         if mode in modes:
             inputMode = mode
@@ -889,13 +667,15 @@ async def genvideo_airforce(
             duration = 5
     prompt = normalize_prompt_value(prompt, "prompt")
-    enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt")
     await check_video_rate_limit(request, authorization, x_client_id)
     RATIO_MAP = {
-        "3:2":  "16:9",
-        "2:3":  "9:16",
-        "1:1":  "1:1",
     }
     pollinations_ratio = RATIO_MAP.get(aspectRatio, "16:9")
@@ -908,20 +688,20 @@ async def genvideo_airforce(
     }
     temp_assets = []
     if image_urls:
         processed_urls = []
         for img in image_urls[:2]:
             if is_base64_image(img):
                 image_id = save_base64_image(img)
                 temp_assets.append(image_id)
                 served_url = f"{request.base_url}asset-cdn/assets/{image_id}"
                 processed_urls.append(served_url)
             else:
                 processed_urls.append(img)
         params["image"] = "|".join(processed_urls)
     if inputMode == "fun":
@@ -933,7 +713,7 @@ async def genvideo_airforce(
     print(f"[VIDEO GEN] Pollinations URL: {url}")
     url = url + f"&key={PKEY}"
     resp = None
-    try:
         async with httpx.AsyncClient(timeout=600) as client:
             resp = await client.get(url)
     finally:
@@ -954,7 +734,7 @@ async def genvideo_airforce(
                 "error": "Upstream video generation failed",
                 "status_code": resp.status_code,
                 "message": body_text[:1000],
-            }
         )
     if not resp.content:
@@ -969,6 +749,7 @@ async def genvideo_airforce(
         },
     )
 AIRFORCE_KEY = os.getenv("AIRFORCE")
 AIRFORCE_VIDEO_MODEL = "grok-imagine-video"
 AIRFORCE_API_URL = "https://api.airforce/v1/images/generations"
@@ -995,31 +776,24 @@ async def genvideo_airforce(
         return Response(
             status_code=200,
             headers={
                 # Required field
                 "Y-prompt": "string — required. The text prompt used to generate the video.",
                 # Optional fields
                 "Y-ratio": "string — optional. Aspect ratio of the output video.",
                 "Y-ratio-values": "3:2,2:3,1:1",
                 "Y-ratio-default": "3:2",
                 "Y-mode": "string — optional. Controls generation style.",
                 "Y-mode-values": "normal,fun",
                 "Y-mode-default": "normal",
                 "Y-duration": "integer — optional. Duration in seconds.",
                 "Y-duration-default": "5",
                 "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
                 "Y-image_urls-max": "2",
                 # Response format
                 "Y-response_format": "video/mp4",
                 # Model info
-                "Y-model": "grok-imagine-video"
-            }
         )
     aspectRatio = "3:2"
@@ -1039,7 +813,7 @@ async def genvideo_airforce(
         if ratio not in valid_ratios:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid aspect ratio {ratio}. Must be one of 3:2, 2:3, or 1:1. Default is 3:2"
             )
         if ratio in ratios:
             aspectRatio = ratio
@@ -1047,7 +821,7 @@ async def genvideo_airforce(
         if mode not in valid_modes:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid mode {mode}. Must be 'normal' or 'fun'. Default is normal"
             )
         if mode in modes:
             inputMode = mode
@@ -1060,7 +834,9 @@ async def genvideo_airforce(
                 raise HTTPException(400, "You may provide at most two image URLs")
     prompt = normalize_prompt_value(prompt, "prompt")
-    enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt")
     await check_video_rate_limit(request, authorization, x_client_id)
     payload = {
@@ -1071,7 +847,7 @@ async def genvideo_airforce(
         "response_format": "b64_json",
         "sse": False,
         "mode": inputMode,
-        "aspectRatio": aspectRatio
     }
     if image_urls:
@@ -1082,9 +858,9 @@ async def genvideo_airforce(
             AIRFORCE_API_URL,
             headers={
                 "Authorization": f"Bearer {AIRFORCE_KEY}",
-                "Content-Type": "application/json"
             },
-            json=payload
         )
     if resp.status_code != 200:
@@ -1113,6 +889,7 @@ async def genvideo_airforce(
         },
     )
 @app.get("/subscription")
 async def get_subscription(authorization: Optional[str] = Header(None)):
     if not authorization or not authorization.startswith("Bearer "):
@@ -1136,7 +913,9 @@ async def get_usage(
     authorization: Optional[str] = Header(None),
     x_client_id: Optional[str] = Header(None),
 ):
-    plan_key, subject = await resolve_rate_limit_identity(request, authorization, x_client_id)
     plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
     usage = get_usage_snapshot_for_subject(plan_key, subject)
     return JSONResponse(
@@ -1149,6 +928,7 @@ async def get_usage(
         },
     )
 @app.get("/tier-config")
 async def tier_config():
     plans = []
@@ -1175,6 +955,7 @@ async def tier_config():
         },
     )
 @app.get("/tiers")
 async def tiers():
     paid_plans = []
@@ -1199,6 +980,7 @@ async def tiers():
         content=paid_plans,
     )
 @app.get("/portal")
 @app.post("/portal")
 async def redirect_to_protal(request: Request):
@@ -1217,8 +999,7 @@ async def redirect_to_protal(request: Request):
         return RedirectResponse(url=base_url, status_code=status.HTTP_302_FOUND)
     if request.method != "POST":
         return RedirectResponse(
-            url=f"{base_url}?prefilled_email={email}",
-            status_code=status.HTTP_302_FOUND
         )
     else:
-        return JSONResponse({"redirect_url": (base_url + "?prefilled_email=" + email)})

 import hashlib
 from fastapi import FastAPI, Request, HTTPException, status, Header
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import (
+    Response,
+    JSONResponse,
+    StreamingResponse,
+    RedirectResponse,
+)
 import httpx
 from bs4 import BeautifulSoup
 from typing import List, Dict, Any
 import re
 from random import randint
 from urllib.parse import quote
 import base64
+from helper.subscriptions import (
+    fetch_subscription,
+    normalize_plan_key,
+    TIER_CONFIG,
+    PLAN_ORDER,
+)
 from typing import Optional
+from helper.keywords import *
+from helper.assets import (
+    save_base64_image,
+    cleanup_image,
+    is_base64_image,
+    asset_router,
+)
+from helper.misc import (
+    extract_user_text,
+    calculate_messages_size,
+    is_long_context,
+    contains_code,
+    is_math_heavy,
+    is_structured_task,
+    multiple_questions,
+    is_code_heavy,
+    normalize_prompt_value,
+    enforce_prompt_size,
+    resolve_bound_subject,
+    get_usage_snapshot_for_subject,
+)
+from helper.ratelimit import (
+    enforce_rate_limit,
+    resolve_rate_limit_identity,
+    check_audio_rate_limit,
+    check_video_rate_limit,
+    check_image_rate_limit,
+    MAX_CHAT_PROMPT_BYTES,
+    MAX_CHAT_PROMPT_CHARS,
+    MAX_GROQ_PROMPT_BYTES,
+    MAX_GROQ_PROMPT_CHARS,
+    MAX_MEDIA_PROMPT_BYTES,
+    MAX_MEDIA_PROMPT_CHARS,
+)
 app = FastAPI()
     allow_headers=["*"],
 )
 app.include_router(asset_router)
+@app.get("/")
+async def reroute_to_home():
+    return RedirectResponse(
+        url="https://inference.js.org", status_code=status.HTTP_308_PERMANENT_REDIRECT
     )
+OLLAMA_LIBRARY_URL = "https://ollama.com/library"
 def is_complex_reasoning(prompt: str) -> bool:
             return True
     return False
+PKEY = os.getenv("POLLINATIONS_KEY", "")
 PKEY2 = os.getenv("POLLINATIONS2_KEY", "")
 PKEY3 = os.getenv("POLLINATIONS3_KEY", "")
     "zai-glm-4.7",
 ]
 async def check_chat_rate_limit(
     request: Request,
     authorization: Optional[str],
 ):
     return await enforce_rate_limit(request, authorization, "cloudChatDaily", client_id)
 @app.head("/status/sfx")
 async def head_sfx():
     return Response(
         headers={
             "Content-Type": "audio/mpeg",
             "Accept-Ranges": "bytes",
+        },
     )
 @app.head("/status/image")
 async def head_image():
     return Response(
         headers={
             "Content-Type": "image/jpeg",
             "Accept-Ranges": "bytes",
+        },
     )
 @app.head("/status/video")
 async def head_video():
     return Response(
         headers={
             "Content-Type": "video/mp4",
             "Accept-Ranges": "bytes",
+        },
     )
 @app.head("/status/text")
 async def head_text():
     return Response(
         },
     )
 @app.get("/status")
 async def get_status():
     notify = ""
     services = {
+        "Video Generation": {"code": 200, "state": "ok", "message": "Running normally"},
+        "Image Generation": {"code": 200, "state": "ok", "message": "Running normally"},
         "Lightning-Text v2": {
             "code": 200,
             "state": "ok",
+            "message": "Running normally",
         },
         "Music/SFX Generation": {
             "code": 200,
             "state": "ok",
+            "message": "Running normally",
+        },
     }
     overall_state = (
+        "ok" if all(s["state"] == "ok" for s in services.values()) else "degraded"
     )
     return JSONResponse(
             "state": overall_state,
             "services": services,
             "notifications": notify,
+            "latest": "2.4.0",
+        },
     )
 @app.post("/gen/image")
 @app.get("/genimg/{prompt}")
 async def generate_image(
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
+    enforce_prompt_size(
+        prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Image prompt"
+    )
     await check_image_rate_limit(request, authorization, x_client_id)
     print(f"[IMAGE GEN] Routing to model: {chosen_model}")
     url = f"https://gen.pollinations.ai/image/{quote(prompt, safe='')}?model={chosen_model}&key={PKEY2}"
+    async with httpx.AsyncClient(timeout=timeout) as client:
         response = await client.get(url)
     if response.status_code != 200:
         raise HTTPException(
+            status_code=500, detail=f"Pollinations error: {response.status_code}"
         )
+    return Response(content=response.content, media_type="image/jpeg")
 @app.head("/models")
 @app.get("/models")
 async def get_models() -> List[Dict]:
         description = item.select_one("p.max-w-lg")
         sizes = [el.get_text(strip=True) for el in item.select("[x-test-size]")]
         pulls = item.select_one("[x-test-pull-count]")
+        tags = [
+            t.get_text(strip=True) for t in item.select('span[class*="text-blue-600"]')
+        ]
         updated = item.select_one("[x-test-updated]")
         link = item.select_one("a")
+        models.append(
+            {
+                "name": name.get_text(strip=True) if name else "",
+                "description": (
+                    description.get_text(strip=True)
+                    if description
+                    else "No description"
+                ),
+                "sizes": sizes,
+                "pulls": pulls.get_text(strip=True) if pulls else "Unknown",
+                "tags": tags,
+                "updated": updated.get_text(strip=True) if updated else "Unknown",
+                "link": link.get("href") if link else None,
+            }
+        )
     return models
 @app.post("/gen/chat/completions")
 async def generate_text(
     request: Request,
         )
     prompt_text = extract_user_text(messages)
     uses_tools = (
         "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
     ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
     long_context = is_long_context(messages)
     code_present = contains_code(prompt_text)
     math_heavy = is_math_heavy(prompt_text)
     code_heavy = is_code_heavy(prompt_text, code_present, long_context)
     score = 0
     if long_context:
         score += 3
     if math_heavy:
         score += 3
     if structured_task:
         score += 2
     if code_present:
         score += 2
     if multi_q:
         score += 1
     for kw in REASONING_KEYWORDS:
         if kw in prompt_text:
             score += 1
     chosen_model = "meta-llama/llama-4-scout-17b-16e-instruct"
     provider = "groq"
     if score > 10:
         provider = "groq"
     elif code_present:
         if code_heavy and score >= 6:
             chosen_model = "gpt-oss-120b"
             provider = "cerebras"
         elif score >= 4:
             chosen_model = "llama-3.3-70b-versatile"
             provider = "groq"
     await check_chat_rate_limit(request, authorization, x_client_id)
     body["model"] = chosen_model
+    print(
+        f"""
     [ADVANCED ROUTER]
       Score: {score}
       Uses tools: {uses_tools}
       Structured: {structured_task}
       Multi-question: {multi_q}
       → Selected: {chosen_model} ({provider})
+    """
+    )
     stream = body.get("stream", False)
     if stream:
         body["stream"] = True
         async def event_generator():
             try:
                 async with httpx.AsyncClient(timeout=None) as client:
                                 .replace("\r", " ")
                             )
                             yield (
+                                'data: {"error": '
+                                f'"Upstream provider error ({r.status_code}): {safe_error_payload}"'
                                 "}\n\n"
                             )
                             return
                         async for line in r.aiter_lines():
                             if line == "":
                                 yield "\n"
                                 continue
                             yield line + "\n"
             except asyncio.CancelledError:
                 return
             except Exception as e:
+                yield f'data: {{"error": "{str(e)}"}}\n\n'
         return StreamingResponse(
             event_generator(),
             media_type="text/event-stream",
                 "message": r.text[:1000],
             }
+        return JSONResponse(status_code=r.status_code, content=payload)
     raise HTTPException(500, "Unknown provider routing error")
 @app.get("/gen/sfx/{prompt}")
 @app.post("/gen/sfx")
 async def gensfx(
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
+    enforce_prompt_size(
+        prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt"
+    )
     await check_audio_rate_limit(request, authorization, x_client_id)
     url = f"https://gen.pollinations.ai/audio/{prompt}?model=elevenmusic&key={PKEY}"
     async with httpx.AsyncClient(timeout=None) as client:
                 "success": False,
                 "error": "Upstream music/sfx generation failed",
                 "status_code": response.status_code,
+                "message": body_text[:1000],
+            },
         )
+    return Response(response.content, media_type="audio/mpeg")
 @app.get("/gen/tts/{prompt}")
 @app.post("/gen/tts")
         payload = await request.json()
         prompt = payload.get("prompt")
     prompt = normalize_prompt_value(prompt, "prompt")
+    enforce_prompt_size(
+        prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt"
+    )
     await check_audio_rate_limit(request, authorization, x_client_id)
     url = f"https://gen.pollinations.ai/audio/{prompt}?key={PKEY3}"
     async with httpx.AsyncClient(timeout=None) as client:
                 "success": False,
                 "error": "Upstream audio generation failed",
                 "status_code": response.status_code,
+                "message": body_text[:1000],
+            },
         )
+    return Response(response.content, media_type="audio/mpeg")
 @app.get("/gen/video/{prompt}")
 @app.post("/gen/video")
 @app.head("/gen/video")
             status_code=200,
             headers={
                 "Y-prompt": "string — required. The text prompt used to generate the video.",
                 "Y-ratio": "string — optional. Aspect ratio of the output video.",
                 "Y-ratio-values": "3:2,2:3,1:1",
                 "Y-ratio-default": "3:2",
                 "Y-mode": "string — optional. Controls generation style.",
                 "Y-mode-values": "normal,fun",
                 "Y-mode-default": "normal",
                 "Y-duration": "integer — optional. Duration in seconds (1–10).",
                 "Y-duration-default": "5",
                 "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
                 "Y-image_urls-max": "2",
                 "Y-response_format": "video/mp4",
+                "Y-model": "grok-video",
+            },
         )
     aspectRatio = "3:2"
         if ratio not in valid_ratios:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid aspect ratio '{ratio}'. Must be one of 3:2, 2:3, or 1:1.",
             )
         if ratio in ratios:
             aspectRatio = ratio
         if mode not in valid_modes:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid mode '{mode}'. Must be 'normal' or 'fun'.",
             )
         if mode in modes:
             inputMode = mode
             duration = 5
     prompt = normalize_prompt_value(prompt, "prompt")
+    enforce_prompt_size(
+        prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt"
+    )
     await check_video_rate_limit(request, authorization, x_client_id)
     RATIO_MAP = {
+        "3:2": "16:9",
+        "2:3": "9:16",
+        "1:1": "1:1",
     }
     pollinations_ratio = RATIO_MAP.get(aspectRatio, "16:9")
     }
     temp_assets = []
     if image_urls:
         processed_urls = []
         for img in image_urls[:2]:
             if is_base64_image(img):
                 image_id = save_base64_image(img)
                 temp_assets.append(image_id)
                 served_url = f"{request.base_url}asset-cdn/assets/{image_id}"
                 processed_urls.append(served_url)
             else:
                 processed_urls.append(img)
         params["image"] = "|".join(processed_urls)
     if inputMode == "fun":
     print(f"[VIDEO GEN] Pollinations URL: {url}")
     url = url + f"&key={PKEY}"
     resp = None
+    try:
         async with httpx.AsyncClient(timeout=600) as client:
             resp = await client.get(url)
     finally:
                 "error": "Upstream video generation failed",
                 "status_code": resp.status_code,
                 "message": body_text[:1000],
+            },
         )
     if not resp.content:
         },
     )
 AIRFORCE_KEY = os.getenv("AIRFORCE")
 AIRFORCE_VIDEO_MODEL = "grok-imagine-video"
 AIRFORCE_API_URL = "https://api.airforce/v1/images/generations"
         return Response(
             status_code=200,
             headers={
                 # Required field
                 "Y-prompt": "string — required. The text prompt used to generate the video.",
                 # Optional fields
                 "Y-ratio": "string — optional. Aspect ratio of the output video.",
                 "Y-ratio-values": "3:2,2:3,1:1",
                 "Y-ratio-default": "3:2",
                 "Y-mode": "string — optional. Controls generation style.",
                 "Y-mode-values": "normal,fun",
                 "Y-mode-default": "normal",
                 "Y-duration": "integer — optional. Duration in seconds.",
                 "Y-duration-default": "5",
                 "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
                 "Y-image_urls-max": "2",
                 # Response format
                 "Y-response_format": "video/mp4",
                 # Model info
+                "Y-model": "grok-imagine-video",
+            },
         )
     aspectRatio = "3:2"
         if ratio not in valid_ratios:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid aspect ratio {ratio}. Must be one of 3:2, 2:3, or 1:1. Default is 3:2",
             )
         if ratio in ratios:
             aspectRatio = ratio
         if mode not in valid_modes:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid mode {mode}. Must be 'normal' or 'fun'. Default is normal",
             )
         if mode in modes:
             inputMode = mode
                 raise HTTPException(400, "You may provide at most two image URLs")
     prompt = normalize_prompt_value(prompt, "prompt")
+    enforce_prompt_size(
+        prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt"
+    )
     await check_video_rate_limit(request, authorization, x_client_id)
     payload = {
         "response_format": "b64_json",
         "sse": False,
         "mode": inputMode,
+        "aspectRatio": aspectRatio,
     }
     if image_urls:
             AIRFORCE_API_URL,
             headers={
                 "Authorization": f"Bearer {AIRFORCE_KEY}",
+                "Content-Type": "application/json",
             },
+            json=payload,
         )
     if resp.status_code != 200:
         },
     )
 @app.get("/subscription")
 async def get_subscription(authorization: Optional[str] = Header(None)):
     if not authorization or not authorization.startswith("Bearer "):
     authorization: Optional[str] = Header(None),
     x_client_id: Optional[str] = Header(None),
 ):
+    plan_key, subject = await resolve_rate_limit_identity(
+        request, authorization, x_client_id
+    )
     plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
     usage = get_usage_snapshot_for_subject(plan_key, subject)
     return JSONResponse(
         },
     )
 @app.get("/tier-config")
 async def tier_config():
     plans = []
         },
     )
 @app.get("/tiers")
 async def tiers():
     paid_plans = []
         content=paid_plans,
     )
 @app.get("/portal")
 @app.post("/portal")
 async def redirect_to_protal(request: Request):
         return RedirectResponse(url=base_url, status_code=status.HTTP_302_FOUND)
     if request.method != "POST":
         return RedirectResponse(
+            url=f"{base_url}?prefilled_email={email}", status_code=status.HTTP_302_FOUND
         )
     else:
+        return JSONResponse({"redirect_url": (base_url + "?prefilled_email=" + email)})

assets.py → helper/assets.py RENAMED Viewed

File without changes

keywords.py → helper/keywords.py RENAMED Viewed

File without changes

helper/misc.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import time
+import asyncio
+from helper.subscriptions import USAGE_PERIODS, usage_locks, usage_store, TIER_CONFIG, client_subject_bindings, CLIENT_BIND_TTL_SECONDS, MAX_CLIENT_ID_LENGTH
+from fastapi import Request, HTTPException
+from typing import Optional, Dict, Any, List
+import re
+import hashlib
+def extract_user_text(messages: list) -> str:
+    return " ".join(
+        message_content_to_text(m.get("content"))
+        for m in messages
+        if isinstance(m, dict) and m.get("role") == "user"
+    ).lower()
+def get_usage_period_key(metric: str) -> str:
+    now = time.gmtime()
+    period = USAGE_PERIODS.get(metric, "daily")
+    if period == "weekly":
+        iso_year, iso_week, _ = time.strftime("%G %V %u", now).split(" ")
+        return f"{iso_year}-W{iso_week}"
+    return time.strftime("%Y-%m-%d", now)
+def sanitize_client_id(raw_client_id: Optional[str]) -> Optional[str]:
+    if not isinstance(raw_client_id, str):
+        return None
+    trimmed = raw_client_id.strip()
+    if not trimmed or len(trimmed) > MAX_CLIENT_ID_LENGTH:
+        return None
+    if not re.match(r"^[A-Za-z0-9._:-]+$", trimmed):
+        return None
+    return trimmed
+def get_usage_lock(metric: str, subject: str) -> asyncio.Lock:
+    metric_locks = usage_locks.get(metric)
+    if metric_locks is None:
+        metric_locks = {}
+        usage_locks[metric] = metric_locks
+    lock = metric_locks.get(subject)
+    if lock is None:
+        lock = asyncio.Lock()
+        metric_locks[subject] = lock
+    return lock
+def build_default_subject(request: Request, client_id: Optional[str]) -> str:
+    if client_id:
+        client_hash = hashlib.sha256(client_id.encode("utf-8")).hexdigest()[:24]
+        return f"client:{client_hash}"
+    host = request.client.host if request.client else "unknown"
+    user_agent = request.headers.get("user-agent", "")
+    ua_hash = (
+        hashlib.sha256(user_agent.encode("utf-8")).hexdigest()[:12]
+        if user_agent
+        else "noua"
+    )
+    return f"anon:{host}:{ua_hash}"
+def bind_client_subject(client_id: Optional[str], subject: str, plan_key: str):
+    if not client_id:
+        return
+    client_subject_bindings[client_id] = {
+        "subject": subject,
+        "plan_key": plan_key,
+        "expires_at": time.time() + CLIENT_BIND_TTL_SECONDS,
+    }
+def resolve_bound_subject(client_id: Optional[str], fallback_subject: str) -> str:
+    if not client_id:
+        return fallback_subject
+    bound = client_subject_bindings.get(client_id)
+    if not bound:
+        return fallback_subject
+    if bound.get("expires_at", 0) <= time.time():
+        client_subject_bindings.pop(client_id, None)
+        return fallback_subject
+    return bound.get("subject", fallback_subject)
+def normalize_prompt_value(prompt: Optional[str], field_name: str = "prompt") -> str:
+    if not isinstance(prompt, str):
+        raise HTTPException(status_code=400, detail=f"{field_name} is required")
+    normalized = prompt.strip()
+    if not normalized:
+        raise HTTPException(status_code=400, detail=f"{field_name} is required")
+    return normalized
+def enforce_prompt_size(prompt: str, max_chars: int, max_bytes: int, context: str):
+    char_len = len(prompt)
+    byte_len = len(prompt.encode("utf-8"))
+    if char_len > max_chars or byte_len > max_bytes:
+        raise HTTPException(
+            status_code=413,
+            detail=(
+                f"{context} is too large ({char_len} chars, {byte_len} bytes). "
+                f"Max allowed is {max_chars} chars or {max_bytes} bytes."
+            ),
+        )
+def message_content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: List[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+                continue
+            if isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return " ".join(parts)
+    return ""
+def calculate_messages_size(messages: list) -> tuple[int, int]:
+    total_chars = 0
+    total_bytes = 0
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+        text = message_content_to_text(message.get("content"))
+        if not text:
+            continue
+        total_chars += len(text)
+        total_bytes += len(text.encode("utf-8"))
+    return total_chars, total_bytes
+def get_usage_snapshot_for_subject(plan_key: str, subject: str) -> Dict[str, Dict[str, Any]]:
+    plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
+    plan_limits = plan.get("limits", {})
+    snapshot: Dict[str, Dict[str, Any]] = {}
+    for metric in usage_store.keys():
+        limit = plan_limits.get(metric)
+        window_key = get_usage_period_key(metric)
+        entry = usage_store[metric].get(subject)
+        used = 0
+        if entry and entry.get("window") == window_key:
+            used = max(0, int(entry.get("count", 0)))
+        remaining = None if limit is None else max(0, int(limit) - used)
+        snapshot[metric] = {
+            "limit": limit,
+            "used": used,
+            "remaining": remaining,
+            "window": window_key,
+            "period": USAGE_PERIODS.get(metric, "daily"),
+        }
+    return snapshot

helper/ratelimit.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import time
+from typing import Optional, Dict
+from fastapi import HTTPException, Request
+from helper.misc import sanitize_client_id, get_usage_lock, get_usage_period_key, build_default_subject, bind_client_subject, resolve_bound_subject
+from helper.subscriptions import fetch_subscription, usage_store, normalize_plan_key, TIER_CONFIG
+import os
+IDENTITY_CACHE_TTL_SECONDS = 60
+identity_cache = {}
+CLIENT_BIND_TTL_SECONDS = int(
+    os.getenv("CLIENT_BIND_TTL_SECONDS", str(8 * 24 * 60 * 60))
+)
+MAX_CLIENT_ID_LENGTH = 128
+client_subject_bindings = {}
+MAX_CHAT_PROMPT_CHARS = int(os.getenv("MAX_CHAT_PROMPT_CHARS", "120000"))
+MAX_CHAT_PROMPT_BYTES = int(os.getenv("MAX_CHAT_PROMPT_BYTES", "500000"))
+MAX_GROQ_PROMPT_CHARS = int(os.getenv("MAX_GROQ_PROMPT_CHARS", "90000"))
+MAX_GROQ_PROMPT_BYTES = int(os.getenv("MAX_GROQ_PROMPT_BYTES", "350000"))
+MAX_MEDIA_PROMPT_CHARS = int(os.getenv("MAX_MEDIA_PROMPT_CHARS", "4000"))
+MAX_MEDIA_PROMPT_BYTES = int(os.getenv("MAX_MEDIA_PROMPT_BYTES", "16000"))
+async def resolve_rate_limit_identity(
+    request: Request,
+    authorization: Optional[str],
+    client_id: Optional[str] = None,
+) -> tuple[str, str]:
+    now = time.time()
+    normalized_client_id = sanitize_client_id(client_id)
+    default_subject = build_default_subject(request, normalized_client_id)
+    if not authorization or not authorization.startswith("Bearer "):
+        return "free", resolve_bound_subject(normalized_client_id, default_subject)
+    token = authorization.split(" ", 1)[1].strip()
+    if not token:
+        return "free", resolve_bound_subject(normalized_client_id, default_subject)
+    cached = identity_cache.get(token)
+    if cached and cached.get("expires_at", 0) > now:
+        plan_key = cached.get("plan_key", "free")
+        subject = cached.get("subject", default_subject)
+        bind_client_subject(normalized_client_id, subject, plan_key)
+        return plan_key, subject
+    try:
+        sub = await fetch_subscription(token)
+    except Exception:
+        return "free", resolve_bound_subject(normalized_client_id, default_subject)
+    if not isinstance(sub, dict) or sub.get("error"):
+        return "free", resolve_bound_subject(normalized_client_id, default_subject)
+    email = sub.get("email")
+    if isinstance(email, str) and email.strip():
+        subject = f"user:{email.strip().lower()}"
+    else:
+        subject = default_subject
+    plan_key = normalize_plan_key(sub.get("plan_key"))
+    identity_cache[token] = {
+        "plan_key": plan_key,
+        "subject": subject,
+        "expires_at": now + IDENTITY_CACHE_TTL_SECONDS,
+    }
+    bind_client_subject(normalized_client_id, subject, plan_key)
+    return plan_key, subject
+async def enforce_rate_limit(
+    request: Request,
+    authorization: Optional[str],
+    metric: str,
+    client_id: Optional[str] = None,
+) -> Dict[str, Optional[int | str]]:
+    if metric not in usage_store:
+        raise HTTPException(status_code=500, detail=f"Unknown limit metric: {metric}")
+    plan_key, subject = await resolve_rate_limit_identity(
+        request, authorization, client_id
+    )
+    plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
+    plan_limits = plan.get("limits", {})
+    limit = plan_limits.get(metric)
+    window_key = get_usage_period_key(metric)
+    lock = get_usage_lock(metric, subject)
+    async with lock:
+        bucket = usage_store[metric]
+        entry = bucket.get(subject)
+        if not entry or entry.get("window") != window_key:
+            entry = {"window": window_key, "count": 0}
+            bucket[subject] = entry
+        if limit is not None and entry["count"] >= int(limit):
+            raise HTTPException(
+                status_code=429,
+                detail=f"{metric} limit reached for {plan.get('name', 'current plan')}",
+            )
+        entry["count"] += 1
+        remaining = None if limit is None else max(0, int(limit) - entry["count"])
+        return {
+            "plan_key": plan_key,
+            "remaining": remaining,
+            "used": entry["count"],
+            "window": window_key,
+        }
+async def check_audio_rate_limit(
+    request: Request,
+    authorization: Optional[str],
+    client_id: Optional[str] = None,
+):
+    await enforce_rate_limit(request, authorization, "audioWeekly", client_id)
+async def check_image_rate_limit(
+    request: Request,
+    authorization: Optional[str],
+    client_id: Optional[str] = None,
+):
+    await enforce_rate_limit(request, authorization, "imagesDaily", client_id)
+async def check_video_rate_limit(
+    request: Request,
+    authorization: Optional[str],
+    client_id: Optional[str] = None,
+):
+    await enforce_rate_limit(request, authorization, "videosDaily", client_id)

subscriptions.py → helper/subscriptions.py RENAMED Viewed

File without changes