R.C.M. commited on
Commit
99d08e9
·
1 Parent(s): 4a46cf8

Improved structure

Browse files
app.py CHANGED
@@ -3,7 +3,12 @@ import time
3
  import hashlib
4
  from fastapi import FastAPI, Request, HTTPException, status, Header
5
  from fastapi.middleware.cors import CORSMiddleware
6
- from fastapi.responses import Response, JSONResponse, StreamingResponse, RedirectResponse
 
 
 
 
 
7
  import httpx
8
  from bs4 import BeautifulSoup
9
  from typing import List, Dict, Any
@@ -11,12 +16,49 @@ import asyncio
11
  import re
12
  from random import randint
13
  from urllib.parse import quote
14
- import uuid
15
  import base64
16
- from subscriptions import fetch_subscription, normalize_plan_key, usage_locks, usage_store, USAGE_PERIODS, TIER_CONFIG, PLAN_ORDER
 
 
 
 
 
17
  from typing import Optional
18
- from keywords import *
19
- from assets import save_base64_image, cleanup_image, is_base64_image, asset_router
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  app = FastAPI()
22
 
@@ -27,266 +69,16 @@ app.add_middleware(
27
  allow_headers=["*"],
28
  )
29
  app.include_router(asset_router)
30
- @app.get("/")
31
- async def reroute_to_status():
32
- return RedirectResponse(url="https://inference.js.org", status_code=status.HTTP_308_PERMANENT_REDIRECT)
33
 
34
- OLLAMA_LIBRARY_URL = "https://ollama.com/library"
35
 
36
- IDENTITY_CACHE_TTL_SECONDS = 60
37
- identity_cache = {}
38
- CLIENT_BIND_TTL_SECONDS = int(os.getenv("CLIENT_BIND_TTL_SECONDS", str(8 * 24 * 60 * 60)))
39
- MAX_CLIENT_ID_LENGTH = 128
40
- client_subject_bindings = {}
41
-
42
- MAX_CHAT_PROMPT_CHARS = int(os.getenv("MAX_CHAT_PROMPT_CHARS", "120000"))
43
- MAX_CHAT_PROMPT_BYTES = int(os.getenv("MAX_CHAT_PROMPT_BYTES", "500000"))
44
- MAX_GROQ_PROMPT_CHARS = int(os.getenv("MAX_GROQ_PROMPT_CHARS", "90000"))
45
- MAX_GROQ_PROMPT_BYTES = int(os.getenv("MAX_GROQ_PROMPT_BYTES", "350000"))
46
- MAX_MEDIA_PROMPT_CHARS = int(os.getenv("MAX_MEDIA_PROMPT_CHARS", "4000"))
47
- MAX_MEDIA_PROMPT_BYTES = int(os.getenv("MAX_MEDIA_PROMPT_BYTES", "16000"))
48
-
49
- def extract_user_text(messages: list) -> str:
50
- return " ".join(
51
- message_content_to_text(m.get("content"))
52
- for m in messages
53
- if isinstance(m, dict) and m.get("role") == "user"
54
- ).lower()
55
-
56
- def get_usage_period_key(metric: str) -> str:
57
- now = time.gmtime()
58
- period = USAGE_PERIODS.get(metric, "daily")
59
- if period == "weekly":
60
- iso_year, iso_week, _ = time.strftime("%G %V %u", now).split(" ")
61
- return f"{iso_year}-W{iso_week}"
62
- return time.strftime("%Y-%m-%d", now)
63
-
64
-
65
- def sanitize_client_id(raw_client_id: Optional[str]) -> Optional[str]:
66
- if not isinstance(raw_client_id, str):
67
- return None
68
- trimmed = raw_client_id.strip()
69
- if not trimmed or len(trimmed) > MAX_CLIENT_ID_LENGTH:
70
- return None
71
- if not re.match(r"^[A-Za-z0-9._:-]+$", trimmed):
72
- return None
73
- return trimmed
74
-
75
-
76
- def get_usage_lock(metric: str, subject: str) -> asyncio.Lock:
77
- metric_locks = usage_locks.get(metric)
78
- if metric_locks is None:
79
- metric_locks = {}
80
- usage_locks[metric] = metric_locks
81
- lock = metric_locks.get(subject)
82
- if lock is None:
83
- lock = asyncio.Lock()
84
- metric_locks[subject] = lock
85
- return lock
86
-
87
-
88
- def build_default_subject(request: Request, client_id: Optional[str]) -> str:
89
- if client_id:
90
- client_hash = hashlib.sha256(client_id.encode("utf-8")).hexdigest()[:24]
91
- return f"client:{client_hash}"
92
- host = request.client.host if request.client else "unknown"
93
- user_agent = request.headers.get("user-agent", "")
94
- ua_hash = (
95
- hashlib.sha256(user_agent.encode("utf-8")).hexdigest()[:12]
96
- if user_agent
97
- else "noua"
98
  )
99
- return f"anon:{host}:{ua_hash}"
100
-
101
-
102
- def bind_client_subject(client_id: Optional[str], subject: str, plan_key: str):
103
- if not client_id:
104
- return
105
- client_subject_bindings[client_id] = {
106
- "subject": subject,
107
- "plan_key": plan_key,
108
- "expires_at": time.time() + CLIENT_BIND_TTL_SECONDS,
109
- }
110
-
111
-
112
- def resolve_bound_subject(client_id: Optional[str], fallback_subject: str) -> str:
113
- if not client_id:
114
- return fallback_subject
115
- bound = client_subject_bindings.get(client_id)
116
- if not bound:
117
- return fallback_subject
118
- if bound.get("expires_at", 0) <= time.time():
119
- client_subject_bindings.pop(client_id, None)
120
- return fallback_subject
121
- return bound.get("subject", fallback_subject)
122
-
123
-
124
- def normalize_prompt_value(prompt: Optional[str], field_name: str = "prompt") -> str:
125
- if not isinstance(prompt, str):
126
- raise HTTPException(status_code=400, detail=f"{field_name} is required")
127
- normalized = prompt.strip()
128
- if not normalized:
129
- raise HTTPException(status_code=400, detail=f"{field_name} is required")
130
- return normalized
131
-
132
-
133
- def enforce_prompt_size(prompt: str, max_chars: int, max_bytes: int, context: str):
134
- char_len = len(prompt)
135
- byte_len = len(prompt.encode("utf-8"))
136
- if char_len > max_chars or byte_len > max_bytes:
137
- raise HTTPException(
138
- status_code=413,
139
- detail=(
140
- f"{context} is too large ({char_len} chars, {byte_len} bytes). "
141
- f"Max allowed is {max_chars} chars or {max_bytes} bytes."
142
- ),
143
- )
144
-
145
-
146
- def message_content_to_text(content: Any) -> str:
147
- if isinstance(content, str):
148
- return content
149
- if isinstance(content, list):
150
- parts: List[str] = []
151
- for item in content:
152
- if isinstance(item, str):
153
- parts.append(item)
154
- continue
155
- if isinstance(item, dict):
156
- text = item.get("text")
157
- if isinstance(text, str):
158
- parts.append(text)
159
- return " ".join(parts)
160
- return ""
161
-
162
-
163
- def calculate_messages_size(messages: list) -> tuple[int, int]:
164
- total_chars = 0
165
- total_bytes = 0
166
- for message in messages:
167
- if not isinstance(message, dict):
168
- continue
169
- text = message_content_to_text(message.get("content"))
170
- if not text:
171
- continue
172
- total_chars += len(text)
173
- total_bytes += len(text.encode("utf-8"))
174
- return total_chars, total_bytes
175
-
176
-
177
- def get_usage_snapshot_for_subject(plan_key: str, subject: str) -> Dict[str, Dict[str, Any]]:
178
- plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
179
- plan_limits = plan.get("limits", {})
180
- snapshot: Dict[str, Dict[str, Any]] = {}
181
- for metric in usage_store.keys():
182
- limit = plan_limits.get(metric)
183
- window_key = get_usage_period_key(metric)
184
- entry = usage_store[metric].get(subject)
185
- used = 0
186
- if entry and entry.get("window") == window_key:
187
- used = max(0, int(entry.get("count", 0)))
188
- remaining = None if limit is None else max(0, int(limit) - used)
189
- snapshot[metric] = {
190
- "limit": limit,
191
- "used": used,
192
- "remaining": remaining,
193
- "window": window_key,
194
- "period": USAGE_PERIODS.get(metric, "daily"),
195
- }
196
- return snapshot
197
-
198
-
199
- async def resolve_rate_limit_identity(
200
- request: Request,
201
- authorization: Optional[str],
202
- client_id: Optional[str] = None,
203
- ) -> tuple[str, str]:
204
- now = time.time()
205
- normalized_client_id = sanitize_client_id(client_id)
206
- default_subject = build_default_subject(request, normalized_client_id)
207
- if not authorization or not authorization.startswith("Bearer "):
208
- return "free", resolve_bound_subject(normalized_client_id, default_subject)
209
-
210
- token = authorization.split(" ", 1)[1].strip()
211
- if not token:
212
- return "free", resolve_bound_subject(normalized_client_id, default_subject)
213
-
214
- cached = identity_cache.get(token)
215
- if cached and cached.get("expires_at", 0) > now:
216
- plan_key = cached.get("plan_key", "free")
217
- subject = cached.get("subject", default_subject)
218
- bind_client_subject(normalized_client_id, subject, plan_key)
219
- return plan_key, subject
220
-
221
- try:
222
- sub = await fetch_subscription(token)
223
- except Exception:
224
- return "free", resolve_bound_subject(normalized_client_id, default_subject)
225
-
226
- if not isinstance(sub, dict) or sub.get("error"):
227
- return "free", resolve_bound_subject(normalized_client_id, default_subject)
228
-
229
- email = sub.get("email")
230
- if isinstance(email, str) and email.strip():
231
- subject = f"user:{email.strip().lower()}"
232
- else:
233
- subject = default_subject
234
-
235
- plan_key = normalize_plan_key(sub.get("plan_key"))
236
- identity_cache[token] = {
237
- "plan_key": plan_key,
238
- "subject": subject,
239
- "expires_at": now + IDENTITY_CACHE_TTL_SECONDS,
240
- }
241
- bind_client_subject(normalized_client_id, subject, plan_key)
242
- return plan_key, subject
243
-
244
-
245
- async def enforce_rate_limit(
246
- request: Request,
247
- authorization: Optional[str],
248
- metric: str,
249
- client_id: Optional[str] = None,
250
- ) -> Dict[str, Optional[int | str]]:
251
- if metric not in usage_store:
252
- raise HTTPException(status_code=500, detail=f"Unknown limit metric: {metric}")
253
-
254
- plan_key, subject = await resolve_rate_limit_identity(request, authorization, client_id)
255
- plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
256
- plan_limits = plan.get("limits", {})
257
- limit = plan_limits.get(metric)
258
-
259
- window_key = get_usage_period_key(metric)
260
- lock = get_usage_lock(metric, subject)
261
- async with lock:
262
- bucket = usage_store[metric]
263
- entry = bucket.get(subject)
264
- if not entry or entry.get("window") != window_key:
265
- entry = {"window": window_key, "count": 0}
266
- bucket[subject] = entry
267
-
268
- if limit is not None and entry["count"] >= int(limit):
269
- raise HTTPException(
270
- status_code=429,
271
- detail=f"{metric} limit reached for {plan.get('name', 'current plan')}",
272
- )
273
-
274
- entry["count"] += 1
275
- remaining = None if limit is None else max(0, int(limit) - entry["count"])
276
- return {
277
- "plan_key": plan_key,
278
- "remaining": remaining,
279
- "used": entry["count"],
280
- "window": window_key,
281
- }
282
 
283
 
284
- async def check_audio_rate_limit(
285
- request: Request,
286
- authorization: Optional[str],
287
- client_id: Optional[str] = None,
288
- ):
289
- await enforce_rate_limit(request, authorization, "audioWeekly", client_id)
290
 
291
 
292
  def is_complex_reasoning(prompt: str) -> bool:
@@ -317,22 +109,7 @@ def is_cinematic_image_prompt(prompt: str) -> bool:
317
  return True
318
  return False
319
 
320
- async def check_image_rate_limit(
321
- request: Request,
322
- authorization: Optional[str],
323
- client_id: Optional[str] = None,
324
- ):
325
- await enforce_rate_limit(request, authorization, "imagesDaily", client_id)
326
-
327
-
328
- async def check_video_rate_limit(
329
- request: Request,
330
- authorization: Optional[str],
331
- client_id: Optional[str] = None,
332
- ):
333
- await enforce_rate_limit(request, authorization, "videosDaily", client_id)
334
-
335
- PKEY = os.getenv("POLLINATIONS_KEY", "")
336
  PKEY2 = os.getenv("POLLINATIONS2_KEY", "")
337
  PKEY3 = os.getenv("POLLINATIONS3_KEY", "")
338
 
@@ -360,6 +137,7 @@ CEREBRAS_MODELS = [
360
  "zai-glm-4.7",
361
  ]
362
 
 
363
  async def check_chat_rate_limit(
364
  request: Request,
365
  authorization: Optional[str],
@@ -367,6 +145,7 @@ async def check_chat_rate_limit(
367
  ):
368
  return await enforce_rate_limit(request, authorization, "cloudChatDaily", client_id)
369
 
 
370
  @app.head("/status/sfx")
371
  async def head_sfx():
372
  return Response(
@@ -374,9 +153,10 @@ async def head_sfx():
374
  headers={
375
  "Content-Type": "audio/mpeg",
376
  "Accept-Ranges": "bytes",
377
- }
378
  )
379
 
 
380
  @app.head("/status/image")
381
  async def head_image():
382
  return Response(
@@ -384,9 +164,10 @@ async def head_image():
384
  headers={
385
  "Content-Type": "image/jpeg",
386
  "Accept-Ranges": "bytes",
387
- }
388
  )
389
 
 
390
  @app.head("/status/video")
391
  async def head_video():
392
  return Response(
@@ -394,9 +175,10 @@ async def head_video():
394
  headers={
395
  "Content-Type": "video/mp4",
396
  "Accept-Ranges": "bytes",
397
- }
398
  )
399
 
 
400
  @app.head("/status/text")
401
  async def head_text():
402
  return Response(
@@ -407,35 +189,27 @@ async def head_text():
407
  },
408
  )
409
 
 
410
  @app.get("/status")
411
  async def get_status():
412
  notify = ""
413
  services = {
414
- "Video Generation": {
415
- "code": 200,
416
- "state": "ok",
417
- "message": "Running normally"
418
- },
419
- "Image Generation": {
420
- "code": 200,
421
- "state": "ok",
422
- "message": "Running normally"
423
- },
424
  "Lightning-Text v2": {
425
  "code": 200,
426
  "state": "ok",
427
- "message": "Running normally"
428
  },
429
  "Music/SFX Generation": {
430
  "code": 200,
431
  "state": "ok",
432
- "message": "Running normally"
433
- }
434
  }
435
 
436
  overall_state = (
437
- "ok" if all(s["state"] == "ok" for s in services.values())
438
- else "degraded"
439
  )
440
 
441
  return JSONResponse(
@@ -444,10 +218,11 @@ async def get_status():
444
  "state": overall_state,
445
  "services": services,
446
  "notifications": notify,
447
- "latest": "2.4.0"
448
- }
449
  )
450
 
 
451
  @app.post("/gen/image")
452
  @app.get("/genimg/{prompt}")
453
  async def generate_image(
@@ -462,7 +237,9 @@ async def generate_image(
462
  payload = await request.json()
463
  prompt = payload.get("prompt")
464
  prompt = normalize_prompt_value(prompt, "prompt")
465
- enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Image prompt")
 
 
466
 
467
  await check_image_rate_limit(request, authorization, x_client_id)
468
 
@@ -482,19 +259,17 @@ async def generate_image(
482
  print(f"[IMAGE GEN] Routing to model: {chosen_model}")
483
 
484
  url = f"https://gen.pollinations.ai/image/{quote(prompt, safe='')}?model={chosen_model}&key={PKEY2}"
485
- async with httpx.AsyncClient(timeout = timeout) as client:
486
  response = await client.get(url)
487
 
488
  if response.status_code != 200:
489
  raise HTTPException(
490
- status_code=500,
491
- detail=f"Pollinations error: {response.status_code}"
492
  )
493
 
494
- return Response(
495
- content=response.content,
496
- media_type="image/jpeg"
497
- )
498
  @app.head("/models")
499
  @app.get("/models")
500
  async def get_models() -> List[Dict]:
@@ -511,22 +286,31 @@ async def get_models() -> List[Dict]:
511
  description = item.select_one("p.max-w-lg")
512
  sizes = [el.get_text(strip=True) for el in item.select("[x-test-size]")]
513
  pulls = item.select_one("[x-test-pull-count]")
514
- tags = [t.get_text(strip=True) for t in item.select('span[class*="text-blue-600"]')]
 
 
515
  updated = item.select_one("[x-test-updated]")
516
  link = item.select_one("a")
517
 
518
- models.append({
519
- "name": name.get_text(strip=True) if name else "",
520
- "description": description.get_text(strip=True) if description else "No description",
521
- "sizes": sizes,
522
- "pulls": pulls.get_text(strip=True) if pulls else "Unknown",
523
- "tags": tags,
524
- "updated": updated.get_text(strip=True) if updated else "Unknown",
525
- "link": link.get("href") if link else None,
526
- })
 
 
 
 
 
 
527
 
528
  return models
529
 
 
530
  @app.post("/gen/chat/completions")
531
  async def generate_text(
532
  request: Request,
@@ -549,11 +333,11 @@ async def generate_text(
549
  )
550
 
551
  prompt_text = extract_user_text(messages)
552
-
553
  uses_tools = (
554
  "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
555
  ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
556
-
557
  long_context = is_long_context(messages)
558
  code_present = contains_code(prompt_text)
559
  math_heavy = is_math_heavy(prompt_text)
@@ -562,26 +346,26 @@ async def generate_text(
562
  code_heavy = is_code_heavy(prompt_text, code_present, long_context)
563
 
564
  score = 0
565
-
566
  if long_context:
567
  score += 3
568
-
569
  if math_heavy:
570
  score += 3
571
-
572
  if structured_task:
573
  score += 2
574
-
575
  if code_present:
576
  score += 2
577
-
578
  if multi_q:
579
  score += 1
580
-
581
  for kw in REASONING_KEYWORDS:
582
  if kw in prompt_text:
583
  score += 1
584
-
585
  chosen_model = "meta-llama/llama-4-scout-17b-16e-instruct"
586
  provider = "groq"
587
  if score > 10:
@@ -594,11 +378,11 @@ async def generate_text(
594
  provider = "groq"
595
 
596
  elif code_present:
597
-
598
  if code_heavy and score >= 6:
599
  chosen_model = "gpt-oss-120b"
600
  provider = "cerebras"
601
-
602
  elif score >= 4:
603
  chosen_model = "llama-3.3-70b-versatile"
604
  provider = "groq"
@@ -621,7 +405,8 @@ async def generate_text(
621
  await check_chat_rate_limit(request, authorization, x_client_id)
622
 
623
  body["model"] = chosen_model
624
- print(f"""
 
625
  [ADVANCED ROUTER]
626
  Score: {score}
627
  Uses tools: {uses_tools}
@@ -631,8 +416,8 @@ async def generate_text(
631
  Structured: {structured_task}
632
  Multi-question: {multi_q}
633
  → Selected: {chosen_model} ({provider})
634
- """)
635
-
636
 
637
  stream = body.get("stream", False)
638
 
@@ -660,7 +445,7 @@ async def generate_text(
660
 
661
  if stream:
662
  body["stream"] = True
663
-
664
  async def event_generator():
665
  try:
666
  async with httpx.AsyncClient(timeout=None) as client:
@@ -685,24 +470,24 @@ async def generate_text(
685
  .replace("\r", " ")
686
  )
687
  yield (
688
- "data: {\"error\": "
689
- f"\"Upstream provider error ({r.status_code}): {safe_error_payload}\""
690
  "}\n\n"
691
  )
692
  return
693
-
694
  async for line in r.aiter_lines():
695
  if line == "":
696
  yield "\n"
697
  continue
698
-
699
  yield line + "\n"
700
-
701
  except asyncio.CancelledError:
702
  return
703
  except Exception as e:
704
- yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
705
-
706
  return StreamingResponse(
707
  event_generator(),
708
  media_type="text/event-stream",
@@ -728,13 +513,11 @@ async def generate_text(
728
  "message": r.text[:1000],
729
  }
730
 
731
- return JSONResponse(
732
- status_code=r.status_code,
733
- content=payload
734
- )
735
 
736
  raise HTTPException(500, "Unknown provider routing error")
737
 
 
738
  @app.get("/gen/sfx/{prompt}")
739
  @app.post("/gen/sfx")
740
  async def gensfx(
@@ -748,7 +531,9 @@ async def gensfx(
748
  payload = await request.json()
749
  prompt = payload.get("prompt")
750
  prompt = normalize_prompt_value(prompt, "prompt")
751
- enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt")
 
 
752
  await check_audio_rate_limit(request, authorization, x_client_id)
753
  url = f"https://gen.pollinations.ai/audio/{prompt}?model=elevenmusic&key={PKEY}"
754
  async with httpx.AsyncClient(timeout=None) as client:
@@ -765,13 +550,11 @@ async def gensfx(
765
  "success": False,
766
  "error": "Upstream music/sfx generation failed",
767
  "status_code": response.status_code,
768
- "message": body_text[:1000]
769
- }
770
  )
771
- return Response(
772
- response.content,
773
- media_type="audio/mpeg"
774
- )
775
 
776
  @app.get("/gen/tts/{prompt}")
777
  @app.post("/gen/tts")
@@ -786,7 +569,9 @@ async def gensfx(
786
  payload = await request.json()
787
  prompt = payload.get("prompt")
788
  prompt = normalize_prompt_value(prompt, "prompt")
789
- enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt")
 
 
790
  await check_audio_rate_limit(request, authorization, x_client_id)
791
  url = f"https://gen.pollinations.ai/audio/{prompt}?key={PKEY3}"
792
  async with httpx.AsyncClient(timeout=None) as client:
@@ -803,13 +588,12 @@ async def gensfx(
803
  "success": False,
804
  "error": "Upstream audio generation failed",
805
  "status_code": response.status_code,
806
- "message": body_text[:1000]
807
- }
808
  )
809
- return Response(
810
- response.content,
811
- media_type="audio/mpeg"
812
- )
813
  @app.get("/gen/video/{prompt}")
814
  @app.post("/gen/video")
815
  @app.head("/gen/video")
@@ -824,25 +608,19 @@ async def genvideo_airforce(
824
  status_code=200,
825
  headers={
826
  "Y-prompt": "string — required. The text prompt used to generate the video.",
827
-
828
  "Y-ratio": "string — optional. Aspect ratio of the output video.",
829
  "Y-ratio-values": "3:2,2:3,1:1",
830
  "Y-ratio-default": "3:2",
831
-
832
  "Y-mode": "string — optional. Controls generation style.",
833
  "Y-mode-values": "normal,fun",
834
  "Y-mode-default": "normal",
835
-
836
  "Y-duration": "integer — optional. Duration in seconds (1–10).",
837
  "Y-duration-default": "5",
838
-
839
  "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
840
  "Y-image_urls-max": "2",
841
-
842
  "Y-response_format": "video/mp4",
843
-
844
- "Y-model": "grok-video"
845
- }
846
  )
847
 
848
  aspectRatio = "3:2"
@@ -863,7 +641,7 @@ async def genvideo_airforce(
863
  if ratio not in valid_ratios:
864
  raise HTTPException(
865
  status_code=400,
866
- detail=f"Invalid aspect ratio '{ratio}'. Must be one of 3:2, 2:3, or 1:1."
867
  )
868
  if ratio in ratios:
869
  aspectRatio = ratio
@@ -871,7 +649,7 @@ async def genvideo_airforce(
871
  if mode not in valid_modes:
872
  raise HTTPException(
873
  status_code=400,
874
- detail=f"Invalid mode '{mode}'. Must be 'normal' or 'fun'."
875
  )
876
  if mode in modes:
877
  inputMode = mode
@@ -889,13 +667,15 @@ async def genvideo_airforce(
889
  duration = 5
890
 
891
  prompt = normalize_prompt_value(prompt, "prompt")
892
- enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt")
 
 
893
  await check_video_rate_limit(request, authorization, x_client_id)
894
 
895
  RATIO_MAP = {
896
- "3:2": "16:9",
897
- "2:3": "9:16",
898
- "1:1": "1:1",
899
  }
900
  pollinations_ratio = RATIO_MAP.get(aspectRatio, "16:9")
901
 
@@ -908,20 +688,20 @@ async def genvideo_airforce(
908
  }
909
 
910
  temp_assets = []
911
-
912
  if image_urls:
913
  processed_urls = []
914
-
915
  for img in image_urls[:2]:
916
  if is_base64_image(img):
917
  image_id = save_base64_image(img)
918
  temp_assets.append(image_id)
919
-
920
  served_url = f"{request.base_url}asset-cdn/assets/{image_id}"
921
  processed_urls.append(served_url)
922
  else:
923
  processed_urls.append(img)
924
-
925
  params["image"] = "|".join(processed_urls)
926
 
927
  if inputMode == "fun":
@@ -933,7 +713,7 @@ async def genvideo_airforce(
933
  print(f"[VIDEO GEN] Pollinations URL: {url}")
934
  url = url + f"&key={PKEY}"
935
  resp = None
936
- try:
937
  async with httpx.AsyncClient(timeout=600) as client:
938
  resp = await client.get(url)
939
  finally:
@@ -954,7 +734,7 @@ async def genvideo_airforce(
954
  "error": "Upstream video generation failed",
955
  "status_code": resp.status_code,
956
  "message": body_text[:1000],
957
- }
958
  )
959
 
960
  if not resp.content:
@@ -969,6 +749,7 @@ async def genvideo_airforce(
969
  },
970
  )
971
 
 
972
  AIRFORCE_KEY = os.getenv("AIRFORCE")
973
  AIRFORCE_VIDEO_MODEL = "grok-imagine-video"
974
  AIRFORCE_API_URL = "https://api.airforce/v1/images/generations"
@@ -995,31 +776,24 @@ async def genvideo_airforce(
995
  return Response(
996
  status_code=200,
997
  headers={
998
-
999
  # Required field
1000
  "Y-prompt": "string — required. The text prompt used to generate the video.",
1001
-
1002
  # Optional fields
1003
  "Y-ratio": "string — optional. Aspect ratio of the output video.",
1004
  "Y-ratio-values": "3:2,2:3,1:1",
1005
  "Y-ratio-default": "3:2",
1006
-
1007
  "Y-mode": "string — optional. Controls generation style.",
1008
  "Y-mode-values": "normal,fun",
1009
  "Y-mode-default": "normal",
1010
-
1011
  "Y-duration": "integer — optional. Duration in seconds.",
1012
  "Y-duration-default": "5",
1013
-
1014
  "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
1015
  "Y-image_urls-max": "2",
1016
-
1017
  # Response format
1018
  "Y-response_format": "video/mp4",
1019
-
1020
  # Model info
1021
- "Y-model": "grok-imagine-video"
1022
- }
1023
  )
1024
 
1025
  aspectRatio = "3:2"
@@ -1039,7 +813,7 @@ async def genvideo_airforce(
1039
  if ratio not in valid_ratios:
1040
  raise HTTPException(
1041
  status_code=400,
1042
- detail=f"Invalid aspect ratio {ratio}. Must be one of 3:2, 2:3, or 1:1. Default is 3:2"
1043
  )
1044
  if ratio in ratios:
1045
  aspectRatio = ratio
@@ -1047,7 +821,7 @@ async def genvideo_airforce(
1047
  if mode not in valid_modes:
1048
  raise HTTPException(
1049
  status_code=400,
1050
- detail=f"Invalid mode {mode}. Must be 'normal' or 'fun'. Default is normal"
1051
  )
1052
  if mode in modes:
1053
  inputMode = mode
@@ -1060,7 +834,9 @@ async def genvideo_airforce(
1060
  raise HTTPException(400, "You may provide at most two image URLs")
1061
 
1062
  prompt = normalize_prompt_value(prompt, "prompt")
1063
- enforce_prompt_size(prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt")
 
 
1064
  await check_video_rate_limit(request, authorization, x_client_id)
1065
 
1066
  payload = {
@@ -1071,7 +847,7 @@ async def genvideo_airforce(
1071
  "response_format": "b64_json",
1072
  "sse": False,
1073
  "mode": inputMode,
1074
- "aspectRatio": aspectRatio
1075
  }
1076
 
1077
  if image_urls:
@@ -1082,9 +858,9 @@ async def genvideo_airforce(
1082
  AIRFORCE_API_URL,
1083
  headers={
1084
  "Authorization": f"Bearer {AIRFORCE_KEY}",
1085
- "Content-Type": "application/json"
1086
  },
1087
- json=payload
1088
  )
1089
 
1090
  if resp.status_code != 200:
@@ -1113,6 +889,7 @@ async def genvideo_airforce(
1113
  },
1114
  )
1115
 
 
1116
  @app.get("/subscription")
1117
  async def get_subscription(authorization: Optional[str] = Header(None)):
1118
  if not authorization or not authorization.startswith("Bearer "):
@@ -1136,7 +913,9 @@ async def get_usage(
1136
  authorization: Optional[str] = Header(None),
1137
  x_client_id: Optional[str] = Header(None),
1138
  ):
1139
- plan_key, subject = await resolve_rate_limit_identity(request, authorization, x_client_id)
 
 
1140
  plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
1141
  usage = get_usage_snapshot_for_subject(plan_key, subject)
1142
  return JSONResponse(
@@ -1149,6 +928,7 @@ async def get_usage(
1149
  },
1150
  )
1151
 
 
1152
  @app.get("/tier-config")
1153
  async def tier_config():
1154
  plans = []
@@ -1175,6 +955,7 @@ async def tier_config():
1175
  },
1176
  )
1177
 
 
1178
  @app.get("/tiers")
1179
  async def tiers():
1180
  paid_plans = []
@@ -1199,6 +980,7 @@ async def tiers():
1199
  content=paid_plans,
1200
  )
1201
 
 
1202
  @app.get("/portal")
1203
  @app.post("/portal")
1204
  async def redirect_to_protal(request: Request):
@@ -1217,8 +999,7 @@ async def redirect_to_protal(request: Request):
1217
  return RedirectResponse(url=base_url, status_code=status.HTTP_302_FOUND)
1218
  if request.method != "POST":
1219
  return RedirectResponse(
1220
- url=f"{base_url}?prefilled_email={email}",
1221
- status_code=status.HTTP_302_FOUND
1222
  )
1223
  else:
1224
- return JSONResponse({"redirect_url": (base_url + "?prefilled_email=" + email)})
 
3
  import hashlib
4
  from fastapi import FastAPI, Request, HTTPException, status, Header
5
  from fastapi.middleware.cors import CORSMiddleware
6
+ from fastapi.responses import (
7
+ Response,
8
+ JSONResponse,
9
+ StreamingResponse,
10
+ RedirectResponse,
11
+ )
12
  import httpx
13
  from bs4 import BeautifulSoup
14
  from typing import List, Dict, Any
 
16
  import re
17
  from random import randint
18
  from urllib.parse import quote
 
19
  import base64
20
+ from helper.subscriptions import (
21
+ fetch_subscription,
22
+ normalize_plan_key,
23
+ TIER_CONFIG,
24
+ PLAN_ORDER,
25
+ )
26
  from typing import Optional
27
+ from helper.keywords import *
28
+ from helper.assets import (
29
+ save_base64_image,
30
+ cleanup_image,
31
+ is_base64_image,
32
+ asset_router,
33
+ )
34
+ from helper.misc import (
35
+ extract_user_text,
36
+ calculate_messages_size,
37
+ is_long_context,
38
+ contains_code,
39
+ is_math_heavy,
40
+ is_structured_task,
41
+ multiple_questions,
42
+ is_code_heavy,
43
+ normalize_prompt_value,
44
+ enforce_prompt_size,
45
+ resolve_bound_subject,
46
+ get_usage_snapshot_for_subject,
47
+ )
48
+
49
+ from helper.ratelimit import (
50
+ enforce_rate_limit,
51
+ resolve_rate_limit_identity,
52
+ check_audio_rate_limit,
53
+ check_video_rate_limit,
54
+ check_image_rate_limit,
55
+ MAX_CHAT_PROMPT_BYTES,
56
+ MAX_CHAT_PROMPT_CHARS,
57
+ MAX_GROQ_PROMPT_BYTES,
58
+ MAX_GROQ_PROMPT_CHARS,
59
+ MAX_MEDIA_PROMPT_BYTES,
60
+ MAX_MEDIA_PROMPT_CHARS,
61
+ )
62
 
63
  app = FastAPI()
64
 
 
69
  allow_headers=["*"],
70
  )
71
  app.include_router(asset_router)
 
 
 
72
 
 
73
 
74
+ @app.get("/")
75
+ async def reroute_to_home():
76
+ return RedirectResponse(
77
+ url="https://inference.js.org", status_code=status.HTTP_308_PERMANENT_REDIRECT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
+ OLLAMA_LIBRARY_URL = "https://ollama.com/library"
 
 
 
 
 
82
 
83
 
84
  def is_complex_reasoning(prompt: str) -> bool:
 
109
  return True
110
  return False
111
 
112
+ PKEY = os.getenv("POLLINATIONS_KEY", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  PKEY2 = os.getenv("POLLINATIONS2_KEY", "")
114
  PKEY3 = os.getenv("POLLINATIONS3_KEY", "")
115
 
 
137
  "zai-glm-4.7",
138
  ]
139
 
140
+
141
  async def check_chat_rate_limit(
142
  request: Request,
143
  authorization: Optional[str],
 
145
  ):
146
  return await enforce_rate_limit(request, authorization, "cloudChatDaily", client_id)
147
 
148
+
149
  @app.head("/status/sfx")
150
  async def head_sfx():
151
  return Response(
 
153
  headers={
154
  "Content-Type": "audio/mpeg",
155
  "Accept-Ranges": "bytes",
156
+ },
157
  )
158
 
159
+
160
  @app.head("/status/image")
161
  async def head_image():
162
  return Response(
 
164
  headers={
165
  "Content-Type": "image/jpeg",
166
  "Accept-Ranges": "bytes",
167
+ },
168
  )
169
 
170
+
171
  @app.head("/status/video")
172
  async def head_video():
173
  return Response(
 
175
  headers={
176
  "Content-Type": "video/mp4",
177
  "Accept-Ranges": "bytes",
178
+ },
179
  )
180
 
181
+
182
  @app.head("/status/text")
183
  async def head_text():
184
  return Response(
 
189
  },
190
  )
191
 
192
+
193
  @app.get("/status")
194
  async def get_status():
195
  notify = ""
196
  services = {
197
+ "Video Generation": {"code": 200, "state": "ok", "message": "Running normally"},
198
+ "Image Generation": {"code": 200, "state": "ok", "message": "Running normally"},
 
 
 
 
 
 
 
 
199
  "Lightning-Text v2": {
200
  "code": 200,
201
  "state": "ok",
202
+ "message": "Running normally",
203
  },
204
  "Music/SFX Generation": {
205
  "code": 200,
206
  "state": "ok",
207
+ "message": "Running normally",
208
+ },
209
  }
210
 
211
  overall_state = (
212
+ "ok" if all(s["state"] == "ok" for s in services.values()) else "degraded"
 
213
  )
214
 
215
  return JSONResponse(
 
218
  "state": overall_state,
219
  "services": services,
220
  "notifications": notify,
221
+ "latest": "2.4.0",
222
+ },
223
  )
224
 
225
+
226
  @app.post("/gen/image")
227
  @app.get("/genimg/{prompt}")
228
  async def generate_image(
 
237
  payload = await request.json()
238
  prompt = payload.get("prompt")
239
  prompt = normalize_prompt_value(prompt, "prompt")
240
+ enforce_prompt_size(
241
+ prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Image prompt"
242
+ )
243
 
244
  await check_image_rate_limit(request, authorization, x_client_id)
245
 
 
259
  print(f"[IMAGE GEN] Routing to model: {chosen_model}")
260
 
261
  url = f"https://gen.pollinations.ai/image/{quote(prompt, safe='')}?model={chosen_model}&key={PKEY2}"
262
+ async with httpx.AsyncClient(timeout=timeout) as client:
263
  response = await client.get(url)
264
 
265
  if response.status_code != 200:
266
  raise HTTPException(
267
+ status_code=500, detail=f"Pollinations error: {response.status_code}"
 
268
  )
269
 
270
+ return Response(content=response.content, media_type="image/jpeg")
271
+
272
+
 
273
  @app.head("/models")
274
  @app.get("/models")
275
  async def get_models() -> List[Dict]:
 
286
  description = item.select_one("p.max-w-lg")
287
  sizes = [el.get_text(strip=True) for el in item.select("[x-test-size]")]
288
  pulls = item.select_one("[x-test-pull-count]")
289
+ tags = [
290
+ t.get_text(strip=True) for t in item.select('span[class*="text-blue-600"]')
291
+ ]
292
  updated = item.select_one("[x-test-updated]")
293
  link = item.select_one("a")
294
 
295
+ models.append(
296
+ {
297
+ "name": name.get_text(strip=True) if name else "",
298
+ "description": (
299
+ description.get_text(strip=True)
300
+ if description
301
+ else "No description"
302
+ ),
303
+ "sizes": sizes,
304
+ "pulls": pulls.get_text(strip=True) if pulls else "Unknown",
305
+ "tags": tags,
306
+ "updated": updated.get_text(strip=True) if updated else "Unknown",
307
+ "link": link.get("href") if link else None,
308
+ }
309
+ )
310
 
311
  return models
312
 
313
+
314
  @app.post("/gen/chat/completions")
315
  async def generate_text(
316
  request: Request,
 
333
  )
334
 
335
  prompt_text = extract_user_text(messages)
336
+
337
  uses_tools = (
338
  "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
339
  ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
340
+
341
  long_context = is_long_context(messages)
342
  code_present = contains_code(prompt_text)
343
  math_heavy = is_math_heavy(prompt_text)
 
346
  code_heavy = is_code_heavy(prompt_text, code_present, long_context)
347
 
348
  score = 0
349
+
350
  if long_context:
351
  score += 3
352
+
353
  if math_heavy:
354
  score += 3
355
+
356
  if structured_task:
357
  score += 2
358
+
359
  if code_present:
360
  score += 2
361
+
362
  if multi_q:
363
  score += 1
364
+
365
  for kw in REASONING_KEYWORDS:
366
  if kw in prompt_text:
367
  score += 1
368
+
369
  chosen_model = "meta-llama/llama-4-scout-17b-16e-instruct"
370
  provider = "groq"
371
  if score > 10:
 
378
  provider = "groq"
379
 
380
  elif code_present:
381
+
382
  if code_heavy and score >= 6:
383
  chosen_model = "gpt-oss-120b"
384
  provider = "cerebras"
385
+
386
  elif score >= 4:
387
  chosen_model = "llama-3.3-70b-versatile"
388
  provider = "groq"
 
405
  await check_chat_rate_limit(request, authorization, x_client_id)
406
 
407
  body["model"] = chosen_model
408
+ print(
409
+ f"""
410
  [ADVANCED ROUTER]
411
  Score: {score}
412
  Uses tools: {uses_tools}
 
416
  Structured: {structured_task}
417
  Multi-question: {multi_q}
418
  → Selected: {chosen_model} ({provider})
419
+ """
420
+ )
421
 
422
  stream = body.get("stream", False)
423
 
 
445
 
446
  if stream:
447
  body["stream"] = True
448
+
449
  async def event_generator():
450
  try:
451
  async with httpx.AsyncClient(timeout=None) as client:
 
470
  .replace("\r", " ")
471
  )
472
  yield (
473
+ 'data: {"error": '
474
+ f'"Upstream provider error ({r.status_code}): {safe_error_payload}"'
475
  "}\n\n"
476
  )
477
  return
478
+
479
  async for line in r.aiter_lines():
480
  if line == "":
481
  yield "\n"
482
  continue
483
+
484
  yield line + "\n"
485
+
486
  except asyncio.CancelledError:
487
  return
488
  except Exception as e:
489
+ yield f'data: {{"error": "{str(e)}"}}\n\n'
490
+
491
  return StreamingResponse(
492
  event_generator(),
493
  media_type="text/event-stream",
 
513
  "message": r.text[:1000],
514
  }
515
 
516
+ return JSONResponse(status_code=r.status_code, content=payload)
 
 
 
517
 
518
  raise HTTPException(500, "Unknown provider routing error")
519
 
520
+
521
  @app.get("/gen/sfx/{prompt}")
522
  @app.post("/gen/sfx")
523
  async def gensfx(
 
531
  payload = await request.json()
532
  prompt = payload.get("prompt")
533
  prompt = normalize_prompt_value(prompt, "prompt")
534
+ enforce_prompt_size(
535
+ prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt"
536
+ )
537
  await check_audio_rate_limit(request, authorization, x_client_id)
538
  url = f"https://gen.pollinations.ai/audio/{prompt}?model=elevenmusic&key={PKEY}"
539
  async with httpx.AsyncClient(timeout=None) as client:
 
550
  "success": False,
551
  "error": "Upstream music/sfx generation failed",
552
  "status_code": response.status_code,
553
+ "message": body_text[:1000],
554
+ },
555
  )
556
+ return Response(response.content, media_type="audio/mpeg")
557
+
 
 
558
 
559
  @app.get("/gen/tts/{prompt}")
560
  @app.post("/gen/tts")
 
569
  payload = await request.json()
570
  prompt = payload.get("prompt")
571
  prompt = normalize_prompt_value(prompt, "prompt")
572
+ enforce_prompt_size(
573
+ prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Audio prompt"
574
+ )
575
  await check_audio_rate_limit(request, authorization, x_client_id)
576
  url = f"https://gen.pollinations.ai/audio/{prompt}?key={PKEY3}"
577
  async with httpx.AsyncClient(timeout=None) as client:
 
588
  "success": False,
589
  "error": "Upstream audio generation failed",
590
  "status_code": response.status_code,
591
+ "message": body_text[:1000],
592
+ },
593
  )
594
+ return Response(response.content, media_type="audio/mpeg")
595
+
596
+
 
597
  @app.get("/gen/video/{prompt}")
598
  @app.post("/gen/video")
599
  @app.head("/gen/video")
 
608
  status_code=200,
609
  headers={
610
  "Y-prompt": "string — required. The text prompt used to generate the video.",
 
611
  "Y-ratio": "string — optional. Aspect ratio of the output video.",
612
  "Y-ratio-values": "3:2,2:3,1:1",
613
  "Y-ratio-default": "3:2",
 
614
  "Y-mode": "string — optional. Controls generation style.",
615
  "Y-mode-values": "normal,fun",
616
  "Y-mode-default": "normal",
 
617
  "Y-duration": "integer — optional. Duration in seconds (1–10).",
618
  "Y-duration-default": "5",
 
619
  "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
620
  "Y-image_urls-max": "2",
 
621
  "Y-response_format": "video/mp4",
622
+ "Y-model": "grok-video",
623
+ },
 
624
  )
625
 
626
  aspectRatio = "3:2"
 
641
  if ratio not in valid_ratios:
642
  raise HTTPException(
643
  status_code=400,
644
+ detail=f"Invalid aspect ratio '{ratio}'. Must be one of 3:2, 2:3, or 1:1.",
645
  )
646
  if ratio in ratios:
647
  aspectRatio = ratio
 
649
  if mode not in valid_modes:
650
  raise HTTPException(
651
  status_code=400,
652
+ detail=f"Invalid mode '{mode}'. Must be 'normal' or 'fun'.",
653
  )
654
  if mode in modes:
655
  inputMode = mode
 
667
  duration = 5
668
 
669
  prompt = normalize_prompt_value(prompt, "prompt")
670
+ enforce_prompt_size(
671
+ prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt"
672
+ )
673
  await check_video_rate_limit(request, authorization, x_client_id)
674
 
675
  RATIO_MAP = {
676
+ "3:2": "16:9",
677
+ "2:3": "9:16",
678
+ "1:1": "1:1",
679
  }
680
  pollinations_ratio = RATIO_MAP.get(aspectRatio, "16:9")
681
 
 
688
  }
689
 
690
  temp_assets = []
691
+
692
  if image_urls:
693
  processed_urls = []
694
+
695
  for img in image_urls[:2]:
696
  if is_base64_image(img):
697
  image_id = save_base64_image(img)
698
  temp_assets.append(image_id)
699
+
700
  served_url = f"{request.base_url}asset-cdn/assets/{image_id}"
701
  processed_urls.append(served_url)
702
  else:
703
  processed_urls.append(img)
704
+
705
  params["image"] = "|".join(processed_urls)
706
 
707
  if inputMode == "fun":
 
713
  print(f"[VIDEO GEN] Pollinations URL: {url}")
714
  url = url + f"&key={PKEY}"
715
  resp = None
716
+ try:
717
  async with httpx.AsyncClient(timeout=600) as client:
718
  resp = await client.get(url)
719
  finally:
 
734
  "error": "Upstream video generation failed",
735
  "status_code": resp.status_code,
736
  "message": body_text[:1000],
737
+ },
738
  )
739
 
740
  if not resp.content:
 
749
  },
750
  )
751
 
752
+
753
  AIRFORCE_KEY = os.getenv("AIRFORCE")
754
  AIRFORCE_VIDEO_MODEL = "grok-imagine-video"
755
  AIRFORCE_API_URL = "https://api.airforce/v1/images/generations"
 
776
  return Response(
777
  status_code=200,
778
  headers={
 
779
  # Required field
780
  "Y-prompt": "string — required. The text prompt used to generate the video.",
 
781
  # Optional fields
782
  "Y-ratio": "string — optional. Aspect ratio of the output video.",
783
  "Y-ratio-values": "3:2,2:3,1:1",
784
  "Y-ratio-default": "3:2",
 
785
  "Y-mode": "string — optional. Controls generation style.",
786
  "Y-mode-values": "normal,fun",
787
  "Y-mode-default": "normal",
 
788
  "Y-duration": "integer — optional. Duration in seconds.",
789
  "Y-duration-default": "5",
 
790
  "Y-image_urls": "array<string> — optional. Up to 2 image URLs for conditioning.",
791
  "Y-image_urls-max": "2",
 
792
  # Response format
793
  "Y-response_format": "video/mp4",
 
794
  # Model info
795
+ "Y-model": "grok-imagine-video",
796
+ },
797
  )
798
 
799
  aspectRatio = "3:2"
 
813
  if ratio not in valid_ratios:
814
  raise HTTPException(
815
  status_code=400,
816
+ detail=f"Invalid aspect ratio {ratio}. Must be one of 3:2, 2:3, or 1:1. Default is 3:2",
817
  )
818
  if ratio in ratios:
819
  aspectRatio = ratio
 
821
  if mode not in valid_modes:
822
  raise HTTPException(
823
  status_code=400,
824
+ detail=f"Invalid mode {mode}. Must be 'normal' or 'fun'. Default is normal",
825
  )
826
  if mode in modes:
827
  inputMode = mode
 
834
  raise HTTPException(400, "You may provide at most two image URLs")
835
 
836
  prompt = normalize_prompt_value(prompt, "prompt")
837
+ enforce_prompt_size(
838
+ prompt, MAX_MEDIA_PROMPT_CHARS, MAX_MEDIA_PROMPT_BYTES, "Video prompt"
839
+ )
840
  await check_video_rate_limit(request, authorization, x_client_id)
841
 
842
  payload = {
 
847
  "response_format": "b64_json",
848
  "sse": False,
849
  "mode": inputMode,
850
+ "aspectRatio": aspectRatio,
851
  }
852
 
853
  if image_urls:
 
858
  AIRFORCE_API_URL,
859
  headers={
860
  "Authorization": f"Bearer {AIRFORCE_KEY}",
861
+ "Content-Type": "application/json",
862
  },
863
+ json=payload,
864
  )
865
 
866
  if resp.status_code != 200:
 
889
  },
890
  )
891
 
892
+
893
  @app.get("/subscription")
894
  async def get_subscription(authorization: Optional[str] = Header(None)):
895
  if not authorization or not authorization.startswith("Bearer "):
 
913
  authorization: Optional[str] = Header(None),
914
  x_client_id: Optional[str] = Header(None),
915
  ):
916
+ plan_key, subject = await resolve_rate_limit_identity(
917
+ request, authorization, x_client_id
918
+ )
919
  plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
920
  usage = get_usage_snapshot_for_subject(plan_key, subject)
921
  return JSONResponse(
 
928
  },
929
  )
930
 
931
+
932
  @app.get("/tier-config")
933
  async def tier_config():
934
  plans = []
 
955
  },
956
  )
957
 
958
+
959
  @app.get("/tiers")
960
  async def tiers():
961
  paid_plans = []
 
980
  content=paid_plans,
981
  )
982
 
983
+
984
  @app.get("/portal")
985
  @app.post("/portal")
986
  async def redirect_to_protal(request: Request):
 
999
  return RedirectResponse(url=base_url, status_code=status.HTTP_302_FOUND)
1000
  if request.method != "POST":
1001
  return RedirectResponse(
1002
+ url=f"{base_url}?prefilled_email={email}", status_code=status.HTTP_302_FOUND
 
1003
  )
1004
  else:
1005
+ return JSONResponse({"redirect_url": (base_url + "?prefilled_email=" + email)})
assets.py → helper/assets.py RENAMED
File without changes
keywords.py → helper/keywords.py RENAMED
File without changes
helper/misc.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import asyncio
3
+
4
+ from helper.subscriptions import USAGE_PERIODS, usage_locks, usage_store, TIER_CONFIG, client_subject_bindings, CLIENT_BIND_TTL_SECONDS, MAX_CLIENT_ID_LENGTH
5
+ from fastapi import Request, HTTPException
6
+ from typing import Optional, Dict, Any, List
7
+ import re
8
+ import hashlib
9
+
10
+ def extract_user_text(messages: list) -> str:
11
+ return " ".join(
12
+ message_content_to_text(m.get("content"))
13
+ for m in messages
14
+ if isinstance(m, dict) and m.get("role") == "user"
15
+ ).lower()
16
+
17
+ def get_usage_period_key(metric: str) -> str:
18
+ now = time.gmtime()
19
+ period = USAGE_PERIODS.get(metric, "daily")
20
+ if period == "weekly":
21
+ iso_year, iso_week, _ = time.strftime("%G %V %u", now).split(" ")
22
+ return f"{iso_year}-W{iso_week}"
23
+ return time.strftime("%Y-%m-%d", now)
24
+
25
+
26
+ def sanitize_client_id(raw_client_id: Optional[str]) -> Optional[str]:
27
+ if not isinstance(raw_client_id, str):
28
+ return None
29
+ trimmed = raw_client_id.strip()
30
+ if not trimmed or len(trimmed) > MAX_CLIENT_ID_LENGTH:
31
+ return None
32
+ if not re.match(r"^[A-Za-z0-9._:-]+$", trimmed):
33
+ return None
34
+ return trimmed
35
+
36
+
37
+ def get_usage_lock(metric: str, subject: str) -> asyncio.Lock:
38
+ metric_locks = usage_locks.get(metric)
39
+ if metric_locks is None:
40
+ metric_locks = {}
41
+ usage_locks[metric] = metric_locks
42
+ lock = metric_locks.get(subject)
43
+ if lock is None:
44
+ lock = asyncio.Lock()
45
+ metric_locks[subject] = lock
46
+ return lock
47
+
48
+
49
+ def build_default_subject(request: Request, client_id: Optional[str]) -> str:
50
+ if client_id:
51
+ client_hash = hashlib.sha256(client_id.encode("utf-8")).hexdigest()[:24]
52
+ return f"client:{client_hash}"
53
+ host = request.client.host if request.client else "unknown"
54
+ user_agent = request.headers.get("user-agent", "")
55
+ ua_hash = (
56
+ hashlib.sha256(user_agent.encode("utf-8")).hexdigest()[:12]
57
+ if user_agent
58
+ else "noua"
59
+ )
60
+ return f"anon:{host}:{ua_hash}"
61
+
62
+
63
+ def bind_client_subject(client_id: Optional[str], subject: str, plan_key: str):
64
+ if not client_id:
65
+ return
66
+ client_subject_bindings[client_id] = {
67
+ "subject": subject,
68
+ "plan_key": plan_key,
69
+ "expires_at": time.time() + CLIENT_BIND_TTL_SECONDS,
70
+ }
71
+
72
+
73
+ def resolve_bound_subject(client_id: Optional[str], fallback_subject: str) -> str:
74
+ if not client_id:
75
+ return fallback_subject
76
+ bound = client_subject_bindings.get(client_id)
77
+ if not bound:
78
+ return fallback_subject
79
+ if bound.get("expires_at", 0) <= time.time():
80
+ client_subject_bindings.pop(client_id, None)
81
+ return fallback_subject
82
+ return bound.get("subject", fallback_subject)
83
+
84
+
85
+ def normalize_prompt_value(prompt: Optional[str], field_name: str = "prompt") -> str:
86
+ if not isinstance(prompt, str):
87
+ raise HTTPException(status_code=400, detail=f"{field_name} is required")
88
+ normalized = prompt.strip()
89
+ if not normalized:
90
+ raise HTTPException(status_code=400, detail=f"{field_name} is required")
91
+ return normalized
92
+
93
+
94
+ def enforce_prompt_size(prompt: str, max_chars: int, max_bytes: int, context: str):
95
+ char_len = len(prompt)
96
+ byte_len = len(prompt.encode("utf-8"))
97
+ if char_len > max_chars or byte_len > max_bytes:
98
+ raise HTTPException(
99
+ status_code=413,
100
+ detail=(
101
+ f"{context} is too large ({char_len} chars, {byte_len} bytes). "
102
+ f"Max allowed is {max_chars} chars or {max_bytes} bytes."
103
+ ),
104
+ )
105
+
106
+
107
+ def message_content_to_text(content: Any) -> str:
108
+ if isinstance(content, str):
109
+ return content
110
+ if isinstance(content, list):
111
+ parts: List[str] = []
112
+ for item in content:
113
+ if isinstance(item, str):
114
+ parts.append(item)
115
+ continue
116
+ if isinstance(item, dict):
117
+ text = item.get("text")
118
+ if isinstance(text, str):
119
+ parts.append(text)
120
+ return " ".join(parts)
121
+ return ""
122
+
123
+
124
+ def calculate_messages_size(messages: list) -> tuple[int, int]:
125
+ total_chars = 0
126
+ total_bytes = 0
127
+ for message in messages:
128
+ if not isinstance(message, dict):
129
+ continue
130
+ text = message_content_to_text(message.get("content"))
131
+ if not text:
132
+ continue
133
+ total_chars += len(text)
134
+ total_bytes += len(text.encode("utf-8"))
135
+ return total_chars, total_bytes
136
+
137
+
138
+ def get_usage_snapshot_for_subject(plan_key: str, subject: str) -> Dict[str, Dict[str, Any]]:
139
+ plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
140
+ plan_limits = plan.get("limits", {})
141
+ snapshot: Dict[str, Dict[str, Any]] = {}
142
+ for metric in usage_store.keys():
143
+ limit = plan_limits.get(metric)
144
+ window_key = get_usage_period_key(metric)
145
+ entry = usage_store[metric].get(subject)
146
+ used = 0
147
+ if entry and entry.get("window") == window_key:
148
+ used = max(0, int(entry.get("count", 0)))
149
+ remaining = None if limit is None else max(0, int(limit) - used)
150
+ snapshot[metric] = {
151
+ "limit": limit,
152
+ "used": used,
153
+ "remaining": remaining,
154
+ "window": window_key,
155
+ "period": USAGE_PERIODS.get(metric, "daily"),
156
+ }
157
+ return snapshot
helper/ratelimit.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Optional, Dict
3
+ from fastapi import HTTPException, Request
4
+ from helper.misc import sanitize_client_id, get_usage_lock, get_usage_period_key, build_default_subject, bind_client_subject, resolve_bound_subject
5
+ from helper.subscriptions import fetch_subscription, usage_store, normalize_plan_key, TIER_CONFIG
6
+ import os
7
+ IDENTITY_CACHE_TTL_SECONDS = 60
8
+ identity_cache = {}
9
+ CLIENT_BIND_TTL_SECONDS = int(
10
+ os.getenv("CLIENT_BIND_TTL_SECONDS", str(8 * 24 * 60 * 60))
11
+ )
12
+ MAX_CLIENT_ID_LENGTH = 128
13
+ client_subject_bindings = {}
14
+
15
+ MAX_CHAT_PROMPT_CHARS = int(os.getenv("MAX_CHAT_PROMPT_CHARS", "120000"))
16
+ MAX_CHAT_PROMPT_BYTES = int(os.getenv("MAX_CHAT_PROMPT_BYTES", "500000"))
17
+ MAX_GROQ_PROMPT_CHARS = int(os.getenv("MAX_GROQ_PROMPT_CHARS", "90000"))
18
+ MAX_GROQ_PROMPT_BYTES = int(os.getenv("MAX_GROQ_PROMPT_BYTES", "350000"))
19
+ MAX_MEDIA_PROMPT_CHARS = int(os.getenv("MAX_MEDIA_PROMPT_CHARS", "4000"))
20
+ MAX_MEDIA_PROMPT_BYTES = int(os.getenv("MAX_MEDIA_PROMPT_BYTES", "16000"))
21
+
22
+ async def resolve_rate_limit_identity(
23
+ request: Request,
24
+ authorization: Optional[str],
25
+ client_id: Optional[str] = None,
26
+ ) -> tuple[str, str]:
27
+ now = time.time()
28
+ normalized_client_id = sanitize_client_id(client_id)
29
+ default_subject = build_default_subject(request, normalized_client_id)
30
+ if not authorization or not authorization.startswith("Bearer "):
31
+ return "free", resolve_bound_subject(normalized_client_id, default_subject)
32
+
33
+ token = authorization.split(" ", 1)[1].strip()
34
+ if not token:
35
+ return "free", resolve_bound_subject(normalized_client_id, default_subject)
36
+
37
+ cached = identity_cache.get(token)
38
+ if cached and cached.get("expires_at", 0) > now:
39
+ plan_key = cached.get("plan_key", "free")
40
+ subject = cached.get("subject", default_subject)
41
+ bind_client_subject(normalized_client_id, subject, plan_key)
42
+ return plan_key, subject
43
+
44
+ try:
45
+ sub = await fetch_subscription(token)
46
+ except Exception:
47
+ return "free", resolve_bound_subject(normalized_client_id, default_subject)
48
+
49
+ if not isinstance(sub, dict) or sub.get("error"):
50
+ return "free", resolve_bound_subject(normalized_client_id, default_subject)
51
+
52
+ email = sub.get("email")
53
+ if isinstance(email, str) and email.strip():
54
+ subject = f"user:{email.strip().lower()}"
55
+ else:
56
+ subject = default_subject
57
+
58
+ plan_key = normalize_plan_key(sub.get("plan_key"))
59
+ identity_cache[token] = {
60
+ "plan_key": plan_key,
61
+ "subject": subject,
62
+ "expires_at": now + IDENTITY_CACHE_TTL_SECONDS,
63
+ }
64
+ bind_client_subject(normalized_client_id, subject, plan_key)
65
+ return plan_key, subject
66
+
67
+
68
+ async def enforce_rate_limit(
69
+ request: Request,
70
+ authorization: Optional[str],
71
+ metric: str,
72
+ client_id: Optional[str] = None,
73
+ ) -> Dict[str, Optional[int | str]]:
74
+ if metric not in usage_store:
75
+ raise HTTPException(status_code=500, detail=f"Unknown limit metric: {metric}")
76
+
77
+ plan_key, subject = await resolve_rate_limit_identity(
78
+ request, authorization, client_id
79
+ )
80
+ plan = TIER_CONFIG.get(plan_key) or TIER_CONFIG["free"]
81
+ plan_limits = plan.get("limits", {})
82
+ limit = plan_limits.get(metric)
83
+
84
+ window_key = get_usage_period_key(metric)
85
+ lock = get_usage_lock(metric, subject)
86
+ async with lock:
87
+ bucket = usage_store[metric]
88
+ entry = bucket.get(subject)
89
+ if not entry or entry.get("window") != window_key:
90
+ entry = {"window": window_key, "count": 0}
91
+ bucket[subject] = entry
92
+
93
+ if limit is not None and entry["count"] >= int(limit):
94
+ raise HTTPException(
95
+ status_code=429,
96
+ detail=f"{metric} limit reached for {plan.get('name', 'current plan')}",
97
+ )
98
+
99
+ entry["count"] += 1
100
+ remaining = None if limit is None else max(0, int(limit) - entry["count"])
101
+ return {
102
+ "plan_key": plan_key,
103
+ "remaining": remaining,
104
+ "used": entry["count"],
105
+ "window": window_key,
106
+ }
107
+
108
+
109
+ async def check_audio_rate_limit(
110
+ request: Request,
111
+ authorization: Optional[str],
112
+ client_id: Optional[str] = None,
113
+ ):
114
+ await enforce_rate_limit(request, authorization, "audioWeekly", client_id)
115
+
116
+
117
+ async def check_image_rate_limit(
118
+ request: Request,
119
+ authorization: Optional[str],
120
+ client_id: Optional[str] = None,
121
+ ):
122
+ await enforce_rate_limit(request, authorization, "imagesDaily", client_id)
123
+
124
+
125
+ async def check_video_rate_limit(
126
+ request: Request,
127
+ authorization: Optional[str],
128
+ client_id: Optional[str] = None,
129
+ ):
130
+ await enforce_rate_limit(request, authorization, "videosDaily", client_id)
131
+
subscriptions.py → helper/subscriptions.py RENAMED
File without changes