""" ══════════════════════════════════════════════════════════════════ ⚡ DevsDo API Server v1.0.0 OpenAI-compatible · 52 Models · Cloudflare AI Backend SSE Streaming · Reasoning · Zero API Keys Sections ──────── §1 Logging §2 Model Registry (g4f-style) §3 Register All 52 Models §4 Think-Tag Stream Parser §5 Backend Client (SSE → raw tokens) §6 FastAPI App + Lifespan §7 Pydantic Schemas §8 Routes §9 Stream Generator (tokens → OpenAI SSE) §10 Non-Stream Collector §11 Entrypoint ══════════════════════════════════════════════════════════════════ """ from __future__ import annotations import json, time, uuid, asyncio, random, logging from contextlib import asynccontextmanager from dataclasses import dataclass, asdict from typing import Optional, AsyncGenerator, Dict, List, Any import aiohttp import aiohttp.resolver from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field # ═══════════════════════════════════════════════════════════ # §1 — LOGGING # ═══════════════════════════════════════════════════════════ logging.basicConfig( level=logging.INFO, format="%(asctime)s │ %(levelname)-7s │ %(message)s", datefmt="%H:%M:%S", ) log = logging.getLogger("devsdo") # ═══════════════════════════════════════════════════════════ # §2 — MODEL REGISTRY (g4f-style) # # Each model carries: # name – short route alias ("deepseek-r1") # real_name – human display name ("DeepSeek R1 Distill Qwen 32B") # author – organisation ("DeepSeek") # family – model family group ("DeepSeek") # model_id – backend @cf/@hf ID ("@cf/deepseek-ai/…") # ═══════════════════════════════════════════════════════════ @dataclass(frozen=True, slots=True) class ModelCard: name: str real_name: str author: str family: str model_id: str class Registry: """Central model store — register once, resolve anywhere.""" _by_name: Dict[str, ModelCard] = {} _by_id: Dict[str, ModelCard] = {} _default: str = "" # ── mutators ────────────────────────────────────── @classmethod def add(cls, *cards: ModelCard): for c in cards: cls._by_name[c.name] = c cls._by_id[c.model_id] = c if not cls._default: cls._default = c.name # ── lookups ─────────────────────────────────────── @classmethod def resolve(cls, raw: Optional[str]) -> str: """Alias / full-id / fuzzy → backend model_id.""" if not raw: return cls._by_name[cls._default].model_id raw = raw.strip() for pfx in ("devsdo/", "devsdo:", "cloudflare/", "cf/"): if raw.lower().startswith(pfx): raw = raw[len(pfx):] break if raw.startswith(("@cf/", "@hf/")): return raw if raw in cls._by_name: return cls._by_name[raw].model_id low = raw.lower() for alias, card in cls._by_name.items(): if low in alias or low in card.model_id.lower(): return card.model_id return raw # pass-through @classmethod def find(cls, raw: str) -> Optional[ModelCard]: mid = cls.resolve(raw) return cls._by_id.get(mid) or cls._by_name.get(raw) @classmethod def all_cards(cls) -> List[ModelCard]: return list(cls._by_name.values()) # ── serialisers ─────────────────────────────────── @classmethod def openai_list(cls) -> dict: """GET /v1/models — OpenAI-compatible.""" return { "object": "list", "data": [ { "id": c.name, "object": "model", "created": 1700000000, "owned_by": c.author.lower().replace(" ", "-"), } for c in cls._by_name.values() ], } @classmethod def internal_list(cls) -> dict: """GET /api/internal/v1/models — rich, grouped by family.""" fam: Dict[str, list] = {} for c in cls._by_name.values(): fam.setdefault(c.family, []).append( { "id": c.name, "name": c.real_name, "author": c.author, "backend_id": c.model_id, } ) return { "server": "DevsDo API", "version": "1.0.0", "timestamp": int(time.time()), "total": len(cls._by_name), "families": [ {"family": fn, "count": len(ms), "models": ms} for fn, ms in fam.items() ], } # ═══════════════════════════════════════════════════════════ # §3 — REGISTER ALL 52 MODELS # ═══════════════════════════════════════════════════════════ Registry.add( # ─── Flagship / Large ───────────────────────────────── ModelCard("kimi-k2.5", "Kimi K2.5", "Moonshot AI", "Kimi", "@cf/moonshotai/kimi-k2.5"), ModelCard("nemotron-120b", "Nemotron 3 120B A12B", "NVIDIA", "Nemotron", "@cf/nvidia/nemotron-3-120b-a12b"), ModelCard("gpt-oss-120b", "GPT-OSS 120B", "OpenAI", "GPT-OSS", "@cf/openai/gpt-oss-120b"), ModelCard("gpt-oss-20b", "GPT-OSS 20B", "OpenAI", "GPT-OSS", "@cf/openai/gpt-oss-20b"), ModelCard("llama-3.3-70b", "LLaMA 3.3 70B Instruct FP8", "Meta", "LLaMA", "@cf/meta/llama-3.3-70b-instruct-fp8-fast"), # ─── Meta LLaMA ─────────────────────────────────────── ModelCard("llama-4-scout", "LLaMA 4 Scout 17B 16E", "Meta", "LLaMA", "@cf/meta/llama-4-scout-17b-16e-instruct"), ModelCard("llama-3.2-11b-vision","LLaMA 3.2 11B Vision", "Meta", "LLaMA", "@cf/meta/llama-3.2-11b-vision-instruct"), ModelCard("llama-3.1-8b", "LLaMA 3.1 8B Fast", "Meta", "LLaMA", "@cf/meta/llama-3.1-8b-instruct-fast"), ModelCard("llama-3.1-8b-fp8", "LLaMA 3.1 8B FP8", "Meta", "LLaMA", "@cf/meta/llama-3.1-8b-instruct-fp8"), ModelCard("llama-3.1-8b-awq", "LLaMA 3.1 8B AWQ", "Meta", "LLaMA", "@cf/meta/llama-3.1-8b-instruct-awq"), ModelCard("llama-3.2-3b", "LLaMA 3.2 3B", "Meta", "LLaMA", "@cf/meta/llama-3.2-3b-instruct"), ModelCard("llama-3.2-1b", "LLaMA 3.2 1B", "Meta", "LLaMA", "@cf/meta/llama-3.2-1b-instruct"), ModelCard("llama-3-8b", "LLaMA 3 8B", "Meta", "LLaMA", "@cf/meta/llama-3-8b-instruct"), ModelCard("llama-3-8b-awq", "LLaMA 3 8B AWQ", "Meta", "LLaMA", "@cf/meta/llama-3-8b-instruct-awq"), ModelCard("llama-guard-3", "LLaMA Guard 3 8B", "Meta", "LLaMA", "@cf/meta/llama-guard-3-8b"), ModelCard("llama-2-7b-fp16", "LLaMA 2 7B FP16", "Meta", "LLaMA", "@cf/meta/llama-2-7b-chat-fp16"), ModelCard("llama-2-7b-int8", "LLaMA 2 7B INT8", "Meta", "LLaMA", "@cf/meta/llama-2-7b-chat-int8"), ModelCard("llama-2-7b-lora", "LLaMA 2 7B LoRA", "Meta", "LLaMA", "@cf/meta-llama/llama-2-7b-chat-hf-lora"), ModelCard("llama-2-13b", "LLaMA 2 13B AWQ", "Meta", "LLaMA", "@hf/thebloke/llama-2-13b-chat-awq"), # ─── Qwen ───────────────────────────────────────────── ModelCard("qwq-32b", "QwQ 32B", "Qwen", "Qwen", "@cf/qwen/qwq-32b"), ModelCard("qwen-coder-32b", "Qwen 2.5 Coder 32B", "Qwen", "Qwen", "@cf/qwen/qwen2.5-coder-32b-instruct"), ModelCard("qwen3-30b", "Qwen 3 30B A3B FP8", "Qwen", "Qwen", "@cf/qwen/qwen3-30b-a3b-fp8"), ModelCard("qwen1.5-14b", "Qwen 1.5 14B AWQ", "Qwen", "Qwen", "@cf/qwen/qwen1.5-14b-chat-awq"), ModelCard("qwen1.5-7b", "Qwen 1.5 7B AWQ", "Qwen", "Qwen", "@cf/qwen/qwen1.5-7b-chat-awq"), ModelCard("qwen1.5-1.8b", "Qwen 1.5 1.8B", "Qwen", "Qwen", "@cf/qwen/qwen1.5-1.8b-chat"), ModelCard("qwen1.5-0.5b", "Qwen 1.5 0.5B", "Qwen", "Qwen", "@cf/qwen/qwen1.5-0.5b-chat"), # ─── DeepSeek ───────────────────────────────────────── ModelCard("deepseek-r1", "DeepSeek R1 Distill Qwen 32B", "DeepSeek", "DeepSeek", "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"), ModelCard("deepseek-math", "DeepSeek Math 7B", "DeepSeek", "DeepSeek", "@cf/deepseek-ai/deepseek-math-7b-instruct"), ModelCard("deepseek-coder-base", "DeepSeek Coder 6.7B Base", "DeepSeek", "DeepSeek", "@hf/thebloke/deepseek-coder-6.7b-base-awq"), ModelCard("deepseek-coder", "DeepSeek Coder 6.7B Instruct", "DeepSeek", "DeepSeek", "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"), # ─── Google Gemma ───────────────────────────────────── ModelCard("gemma-3-12b", "Gemma 3 12B IT", "Google", "Gemma", "@cf/google/gemma-3-12b-it"), ModelCard("gemma-7b", "Gemma 7B IT", "Google", "Gemma", "@hf/google/gemma-7b-it"), ModelCard("gemma-2b-lora", "Gemma 2B IT LoRA", "Google", "Gemma", "@cf/google/gemma-2b-it-lora"), ModelCard("gemma-7b-lora", "Gemma 7B IT LoRA", "Google", "Gemma", "@cf/google/gemma-7b-it-lora"), # ─── Mistral ────────────────────────────────────────── ModelCard("mistral-small-3.1", "Mistral Small 3.1 24B", "Mistral AI", "Mistral", "@cf/mistralai/mistral-small-3.1-24b-instruct"), ModelCard("mistral-v0.2", "Mistral 7B v0.2", "Mistral AI", "Mistral", "@hf/mistral/mistral-7b-instruct-v0.2"), ModelCard("mistral-v0.2-lora", "Mistral 7B v0.2 LoRA", "Mistral AI", "Mistral", "@cf/mistral/mistral-7b-instruct-v0.2-lora"), ModelCard("mistral-v0.1", "Mistral 7B v0.1", "Mistral AI", "Mistral", "@cf/mistral/mistral-7b-instruct-v0.1"), ModelCard("mistral-v0.1-awq", "Mistral 7B v0.1 AWQ", "Mistral AI", "Mistral", "@hf/thebloke/mistral-7b-instruct-v0.1-awq"), # ─── IBM Granite ────────────────────────────────────── ModelCard("granite-4.0", "Granite 4.0 H Micro", "IBM", "Granite", "@cf/ibm-granite/granite-4.0-h-micro"), # ─── ZhipuAI GLM ───────────────────────────────────── ModelCard("glm-4.7-flash", "GLM 4.7 Flash", "ZhipuAI", "GLM", "@cf/zai-org/glm-4.7-flash"), # ─── AI Singapore ───────────────────────────────────── ModelCard("sea-lion-27b", "SEA-LION v4 27B", "AI Singapore", "SEA-LION", "@cf/aisingapore/gemma-sea-lion-v4-27b-it"), # ─── Community / Other ──────────────────────────────── ModelCard("hermes-2-pro", "Hermes 2 Pro Mistral 7B", "NousResearch", "Hermes", "@hf/nousresearch/hermes-2-pro-mistral-7b"), ModelCard("openhermes-2.5", "OpenHermes 2.5 Mistral 7B", "NousResearch", "Hermes", "@hf/thebloke/openhermes-2.5-mistral-7b-awq"), ModelCard("starling-7b", "Starling LM 7B Beta", "Nexusflow", "Starling", "@hf/nexusflow/starling-lm-7b-beta"), ModelCard("neural-chat-7b", "Neural Chat 7B v3.1", "Intel", "Neural Chat", "@hf/thebloke/neural-chat-7b-v3-1-awq"), ModelCard("openchat-3.5", "OpenChat 3.5", "OpenChat", "OpenChat", "@cf/openchat/openchat-3.5-0106"), ModelCard("cybertron-7b", "UNA Cybertron 7B v2", "fblgit", "Cybertron", "@cf/fblgit/una-cybertron-7b-v2-bf16"), ModelCard("discolm-german-7b", "DiscoLM German 7B", "TheBloke", "DiscoLM", "@cf/thebloke/discolm-german-7b-v1-awq"), ModelCard("zephyr-7b", "Zephyr 7B Beta", "HuggingFace", "Zephyr", "@hf/thebloke/zephyr-7b-beta-awq"), ModelCard("falcon-7b", "Falcon 7B Instruct", "TII UAE", "Falcon", "@cf/tiiuae/falcon-7b-instruct"), ModelCard("tinyllama-1.1b", "TinyLlama 1.1B Chat", "TinyLlama", "TinyLlama", "@cf/tinyllama/tinyllama-1.1b-chat-v1.0"), ModelCard("phi-2", "Phi 2", "Microsoft", "Phi", "@cf/microsoft/phi-2"), ModelCard("sqlcoder", "SQLCoder 7B 2", "Defog", "SQLCoder", "@cf/defog/sqlcoder-7b-2"), ) # ═══════════════════════════════════════════════════════════ # §4 — THINK-TAG STREAM PARSER # # Detects across chunked tokens. # Yields ("reasoning", text) or ("content", text). # Handles tags split across multiple SSE tokens. # ═══════════════════════════════════════════════════════════ class ThinkParser: __slots__ = ("thinking", "buf") OPEN = "" # 7 chars CLOSE = "" # 8 chars def __init__(self): self.thinking = False self.buf = "" # ── feed one token, get classified fragments ────── def feed(self, token: str) -> list[tuple[str, str]]: self.buf += token out: list[tuple[str, str]] = [] while self.buf: tag = self.CLOSE if self.thinking else self.OPEN kind = "reasoning" if self.thinking else "content" idx = self.buf.find(tag) if idx >= 0: # full tag found — emit text before, flip state if idx > 0: out.append((kind, self.buf[:idx])) self.buf = self.buf[idx + len(tag) :] self.thinking = not self.thinking continue # no full tag — check for partial tag stuck at end held = self._partial(tag) if held: safe = self.buf[: -len(held)] if safe: out.append((kind, safe)) self.buf = held else: out.append((kind, self.buf)) self.buf = "" break return out # ── drain remaining buffer at stream end ────────── def flush(self) -> list[tuple[str, str]]: if not self.buf: return [] kind = "reasoning" if self.thinking else "content" r = [(kind, self.buf)] self.buf = "" return r # ── helper: longest suffix of buf that is a prefix of tag def _partial(self, tag: str) -> str: for i in range(min(len(tag) - 1, len(self.buf)), 0, -1): if self.buf[-i:] == tag[:i]: return self.buf[-i:] return "" # ═══════════════════════════════════════════════════════════ # §5 — BACKEND CLIENT # # Talks to the Cloudflare AI proxy hosted on HF Spaces. # Parses upstream SSE and yields raw string tokens. # Retries on transient HTTP errors. # ═══════════════════════════════════════════════════════════ _BACKEND = "https://adarshu07-ls.hf.space" _BACKEND_URL = f"{_BACKEND}/v1/chat/completions" _RETRYABLE = frozenset({429, 500, 502, 503, 504, 520, 521, 522, 523, 524}) _FATAL = frozenset({400, 401, 403, 404, 405, 422}) _BE_HEADERS = { "Accept": "application/json", "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json", "Origin": _BACKEND, "Referer": f"{_BACKEND}/docs", "User-Agent": ( "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/131.0.0.0 Safari/537.36" ), } def _parse_sse(line: str) -> tuple[str, bool]: """One SSE data: line → (token_text, is_done).""" line = line.strip() if not line.startswith("data:"): return "", False payload = line[5:].strip() if payload == "[DONE]": return "", True try: obj = json.loads(payload) if "error" in obj: return "", True delta = obj.get("choices", [{}])[0].get("delta", {}) return delta.get("content", "") or "", False except (json.JSONDecodeError, KeyError, IndexError): return "", False async def backend_stream( session: aiohttp.ClientSession, messages: list[dict], model_id: str, temperature: float = 0.7, max_tokens: int = 4096, timeout: int = 180, retries: int = 2, ) -> AsyncGenerator[str, None]: """POST → upstream, parse SSE, yield raw tokens.""" body: dict = { "model": model_id, "messages": messages, "stream": True, "temperature": temperature, } if max_tokens: body["max_tokens"] = max_tokens last_err = "" for attempt in range(1 + retries): try: async with session.post( _BACKEND_URL, json=body, timeout=aiohttp.ClientTimeout( total=timeout, sock_connect=30, sock_read=timeout, ), ) as resp: if resp.status == 200: while True: raw = await resp.content.readline() if not raw: break line = raw.decode("utf-8", errors="replace") if not line.strip(): continue tok, done = _parse_sse(line) if done: return if tok: yield tok return text = await resp.text() last_err = f"HTTP {resp.status}: {text[:300]}" if resp.status in _FATAL: raise RuntimeError(last_err) if resp.status in _RETRYABLE and attempt < retries: wait = min(2.0 * (attempt + 1) + random.random(), 15) log.warning(f"Retry {attempt+1}/{retries} in {wait:.1f}s — {last_err}") await asyncio.sleep(wait) continue raise RuntimeError(last_err) except (RuntimeError, GeneratorExit): raise except (aiohttp.ClientError, asyncio.TimeoutError, OSError) as exc: last_err = str(exc) if attempt < retries: log.warning(f"Retry {attempt+1}/{retries} — {last_err}") await asyncio.sleep(1.5 * (attempt + 1)) continue raise RuntimeError(f"Backend unreachable: {last_err}") from exc raise RuntimeError(f"All retries exhausted: {last_err}") # ═══════════════════════════════════════════════════════════ # §6 — FASTAPI APP + LIFESPAN # ═══════════════════════════════════════════════════════════ @asynccontextmanager async def lifespan(app: FastAPI): # ── startup ─────────────────────────────────────── connector = aiohttp.TCPConnector( resolver=aiohttp.resolver.ThreadedResolver(), limit=100, limit_per_host=15, ttl_dns_cache=300, keepalive_timeout=60, enable_cleanup_closed=True, ) app.state.http = aiohttp.ClientSession( connector=connector, headers=_BE_HEADERS, ) log.info("══════════════════════════════════════════") log.info(" ⚡ DevsDo API Server v1.0.0") log.info(f" Models : {len(Registry.all_cards())}") log.info(f" Backend: {_BACKEND}") log.info(f" Port : 7860") log.info("══════════════════════════════════════════") yield # ── shutdown ────────────────────────────────────── await app.state.http.close() log.info("Server stopped ✓") app = FastAPI( title="⚡ DevsDo API", description="OpenAI-compatible · 52 Models · Streaming · Reasoning", version="1.0.0", docs_url="/docs", redoc_url="/redoc", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ═══════════════════════════════════════════════════════════ # §7 — PYDANTIC SCHEMAS # ═══════════════════════════════════════════════════════════ class Message(BaseModel): role: str content: str class ChatRequest(BaseModel): model: str = "kimi-k2.5" messages: list[Message] = Field(..., min_length=1) stream: bool = False temperature: float = Field(default=0.7, ge=0.0, le=2.0) max_tokens: Optional[int] = Field(default=4096, ge=1) # ═══════════════════════════════════════════════════════════ # §8 — ROUTES # ═══════════════════════════════════════════════════════════ def _cid() -> str: """Generate a chat-completion ID.""" return f"chatcmpl-{uuid.uuid4().hex[:29]}" def _sse(obj: Any) -> str: """Format one SSE frame.""" return f"data: {json.dumps(obj, ensure_ascii=False)}\n\n" # ── info ────────────────────────────────────────────────── @app.get("/") async def root(): return { "service": "⚡ DevsDo API", "version": "1.0.0", "status": "running", "models": len(Registry.all_cards()), "docs": "/docs", "endpoints": { "health": "GET /health", "models_openai": "GET /v1/models", "models_detail": "GET /api/internal/v1/models", "chat": "POST /v1/chat/completions", }, } @app.get("/health") async def health(): return { "status": "healthy", "timestamp": int(time.time()), "models": len(Registry.all_cards()), "backend": _BACKEND, } # ── models ──────────────────────────────────────────────── @app.get("/v1/models") async def models_openai(): """OpenAI-compatible model list.""" return Registry.openai_list() @app.get("/api/internal/v1/models") async def models_internal(): """Rich model registry grouped by family.""" return Registry.internal_list() # ── chat completions ───────────────────────────────────── @app.post("/v1/chat/completions") async def chat_completions(req: ChatRequest): """ OpenAI-compatible chat completions. • stream=false → JSON (reasoning in `reasoning_content`) • stream=true → SSE (reasoning chunks use `reasoning_content` in delta) """ model_id = Registry.resolve(req.model) card = Registry.find(req.model) display = card.name if card else req.model msgs = [{"role": m.role, "content": m.content} for m in req.messages] if req.stream: return StreamingResponse( _stream_gen(app.state.http, msgs, model_id, display, req.temperature, req.max_tokens or 4096), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) return await _complete( app.state.http, msgs, model_id, display, req.temperature, req.max_tokens or 4096, ) # ═══════════════════════════════════════════════════════════ # §9 — SSE STREAM GENERATOR # # backend tokens → ThinkParser → OpenAI SSE chunks # # Reasoning tokens go into delta.reasoning_content # Normal tokens go into delta.content # ═══════════════════════════════════════════════════════════ async def _stream_gen( session: aiohttp.ClientSession, messages: list[dict], model_id: str, model_name: str, temperature: float, max_tokens: int, ) -> AsyncGenerator[str, None]: cid = _cid() ts = int(time.time()) parser = ThinkParser() def _chunk(delta: dict, finish: Optional[str] = None) -> str: return _sse({ "id": cid, "object": "chat.completion.chunk", "created": ts, "model": model_name, "choices": [{ "index": 0, "delta": delta, "finish_reason": finish, }], }) # ── role announcement ───────────────────────────── yield _chunk({"role": "assistant"}) try: async for token in backend_stream( session, messages, model_id, temperature, max_tokens, ): for kind, text in parser.feed(token): if kind == "reasoning": yield _chunk({"reasoning": text}) else: yield _chunk({"content": text}) # ── flush parser buffer ─────────────────────── for kind, text in parser.flush(): if kind == "reasoning": yield _chunk({"reasoning": text}) else: yield _chunk({"content": text}) # ── stop ────────────────────────────────────── yield _chunk({}, finish="stop") yield "data: [DONE]\n\n" except Exception as exc: log.error(f"Stream error [{model_name}]: {exc}") yield _chunk({"content": f"\n\n[Error: {exc}]"}, finish="error") yield "data: [DONE]\n\n" # ═══════════════════════════════════════════════════════════ # §10 — NON-STREAMING COLLECTOR # ═══════════════════════════════════════════════════════════ async def _complete( session: aiohttp.ClientSession, messages: list[dict], model_id: str, model_name: str, temperature: float, max_tokens: int, ) -> dict: """Collect full response, separate reasoning vs content.""" parser = ThinkParser() reasoning: list[str] = [] content: list[str] = [] try: async for token in backend_stream( session, messages, model_id, temperature, max_tokens, ): for kind, text in parser.feed(token): (reasoning if kind == "reasoning" else content).append(text) for kind, text in parser.flush(): (reasoning if kind == "reasoning" else content).append(text) except Exception as exc: raise HTTPException(status_code=502, detail=f"Backend error: {exc}") msg: dict = { "role": "assistant", "content": "".join(content), } if reasoning: msg["reasoning"] = "".join(reasoning) total_chars = len(msg["content"]) + len(msg.get("reasoning", "")) return { "id": _cid(), "object": "chat.completion", "created": int(time.time()), "model": model_name, "choices": [{ "index": 0, "message": msg, "finish_reason": "stop", }], "usage": { "prompt_tokens": 0, "completion_tokens": total_chars // 4, # rough estimate "total_tokens": total_chars // 4, }, } # ═══════════════════════════════════════════════════════════ # §11 — ENTRYPOINT # ═══════════════════════════════════════════════════════════ if __name__ == "__main__": import uvicorn uvicorn.run( "app:app", host="0.0.0.0", port=7860, workers=1, timeout_keep_alive=120, log_level="info", )