Spaces:
Running
Running
| """ | |
| NEXUS — Model Router & Inference Gateway | |
| OpenAI-compatible API with intelligent routing across 3 providers. | |
| Providers (priority order): | |
| 1. ki_fusion — ki-fusion-labs.de/v1 (primary, LM Studio via PHP) | |
| 2. hf_api — HuggingFace Serverless Inference (HF_TOKEN env) | |
| 3. local_cpu — transformers, Qwen2.5-0.5B on CPU (always-on fallback) | |
| Routing decisions based on: | |
| task_type : simple / reasoning / planning / code / vision / embedding | |
| complexity : 1-10 score from message analysis | |
| cost_mode : cheap / balanced / best | |
| provider : explicit override | |
| MCP tools: nexus_chat, nexus_route_info, nexus_stats, nexus_models, nexus_health | |
| """ | |
| import os, uuid, json, asyncio, time, re, math, logging | |
| from pathlib import Path | |
| from datetime import datetime, timezone | |
| from typing import Optional, AsyncGenerator | |
| import httpx | |
| from fastapi import FastAPI, HTTPException, Request | |
| from fastapi.responses import JSONResponse, HTMLResponse, StreamingResponse | |
| logging.basicConfig(level=logging.INFO) | |
| log = logging.getLogger("nexus") | |
| BASE = Path(__file__).parent | |
| STATS_FILE = BASE / "stats.json" | |
| # ── Env ─────────────────────────────────────────────────────────── | |
| KF_BASE = os.environ.get("KI_FUSION_URL", "https://ki-fusion-labs.de/v1") | |
| KF_KEY = os.environ.get("KI_FUSION_KEY", "") | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| HF_BASE = "https://api-inference.huggingface.co/models" | |
| HF_OAI = "https://api-inference.huggingface.co/v1" | |
| # ── Model catalogue ─────────────────────────────────────────────── | |
| # ki_fusion uses whatever LM Studio has loaded — model name is configurable | |
| KF_DEFAULT_MODEL = os.environ.get("KF_MODEL", "lm-studio") | |
| HF_MODELS = { | |
| "simple": "Qwen/Qwen2.5-7B-Instruct", | |
| "reasoning": "meta-llama/Llama-3.1-8B-Instruct", | |
| "planning": "mistralai/Mistral-7B-Instruct-v0.3", | |
| "code": "Qwen/Qwen2.5-Coder-7B-Instruct", | |
| "vision": "Qwen/Qwen2.5-VL-7B-Instruct", | |
| "fast": "Qwen/Qwen2.5-0.5B-Instruct", | |
| } | |
| LOCAL_MODEL_ID = os.environ.get("LOCAL_MODEL", "Qwen/Qwen2.5-0.5B-Instruct") | |
| # ── Stats ───────────────────────────────────────────────────────── | |
| def load_stats(): | |
| if STATS_FILE.exists(): | |
| try: return json.loads(STATS_FILE.read_text()) | |
| except: pass | |
| return { | |
| "total_requests": 0, | |
| "by_provider": {"ki_fusion":{"ok":0,"fail":0,"total_ms":0,"tokens":0}, | |
| "hf_api": {"ok":0,"fail":0,"total_ms":0,"tokens":0}, | |
| "local_cpu":{"ok":0,"fail":0,"total_ms":0,"tokens":0}}, | |
| "by_task": {"simple":0,"reasoning":0,"planning":0,"code":0,"vision":0,"embedding":0,"unknown":0}, | |
| "by_cost": {"cheap":0,"balanced":0,"best":0}, | |
| "recent": [], # last 20 routing decisions | |
| } | |
| def save_stats(s): | |
| STATS_FILE.write_text(json.dumps(s, indent=2)) | |
| STATS = load_stats() | |
| def record(provider, task, cost_mode, ok, ms, tokens, model, reason): | |
| STATS["total_requests"] += 1 | |
| p = STATS["by_provider"][provider] | |
| if ok: p["ok"] += 1 | |
| else: p["fail"] += 1 | |
| p["total_ms"] += ms | |
| p["tokens"] += tokens | |
| STATS["by_task"][task] = STATS["by_task"].get(task, 0) + 1 | |
| STATS["by_cost"][cost_mode] = STATS["by_cost"].get(cost_mode, 0) + 1 | |
| entry = { | |
| "id": uuid.uuid4().hex[:8], | |
| "ts": int(time.time()), | |
| "provider": provider, "model": model, | |
| "task": task, "cost_mode": cost_mode, | |
| "ok": ok, "ms": ms, "tokens": tokens, | |
| "reason": reason | |
| } | |
| STATS["recent"] = ([entry] + STATS["recent"])[:20] | |
| save_stats(STATS) | |
| # ── Task classifier ─────────────────────────────────────────────── | |
| TASK_PATTERNS = { | |
| "vision": [r"\bimage\b",r"\bscreenshot\b",r"\bphoto\b",r"\bpicture\b",r"\bdescribe.{0,20}image\b",r"\bvision\b"], | |
| "code": [r"\bcode\b",r"\bfunction\b",r"\bclass\b",r"\bdebug\b",r"\bimplements?\b",r"\bpython\b", | |
| r"\bjavascript\b",r"\brefactor\b",r"\bscript\b",r"\bbug\b",r"\bsyntax\b"], | |
| "reasoning": [r"\bwhy\b",r"\bexplain\b",r"\banalyze\b",r"\banalyse\b",r"\breason\b", | |
| r"\bprove\b",r"\bcompare\b",r"\bdifference\b",r"\badvantages?\b",r"\bthink\b"], | |
| "planning": [r"\bplan\b",r"\bstrategy\b",r"\bsteps?\b",r"\broadmap\b",r"\bschedule\b", | |
| r"\bprioritize\b",r"\bworkflow\b",r"\barchitecture\b",r"\bdesign\b"], | |
| "embedding": [r"\bembed\b",r"\bvector\b",r"\bsimilarity\b",r"\bsemantic\b",r"\bencod"], | |
| "simple": [], # fallthrough | |
| } | |
| def classify_task(messages: list) -> str: | |
| text = " ".join( | |
| m.get("content","") if isinstance(m.get("content"), str) | |
| else " ".join(c.get("text","") for c in m.get("content",[]) if isinstance(c,dict)) | |
| for m in messages | |
| ).lower() | |
| # Vision: check for image content blocks | |
| for m in messages: | |
| if isinstance(m.get("content"), list): | |
| for c in m["content"]: | |
| if isinstance(c, dict) and c.get("type") == "image_url": | |
| return "vision" | |
| for task, patterns in TASK_PATTERNS.items(): | |
| if task == "simple": continue | |
| for p in patterns: | |
| if re.search(p, text): return task | |
| return "simple" | |
| def score_complexity(messages: list) -> int: | |
| text = " ".join( | |
| m.get("content","") if isinstance(m.get("content"), str) else "" | |
| for m in messages | |
| ) | |
| score = 1 | |
| words = len(text.split()) | |
| if words > 50: score += 1 | |
| if words > 150: score += 1 | |
| if words > 400: score += 2 | |
| # Multi-step indicators | |
| if re.search(r"\bstep\s*\d|first.*then.*finally|\d+\.\s+", text.lower()): score += 1 | |
| # Technical density | |
| tech_words = ["algorithm","optimization","architecture","implement","integrate", | |
| "distributed","concurrent","neural","transformer","gradient","latency"] | |
| hits = sum(1 for w in tech_words if w in text.lower()) | |
| score += min(hits, 3) | |
| # Question count | |
| score += min(text.count("?"), 2) | |
| return min(score, 10) | |
| # ── Provider health ─────────────────────────────────────────────── | |
| provider_health = {"ki_fusion": True, "hf_api": bool(HF_TOKEN), "local_cpu": True} | |
| # Real model ID discovered from LM Studio at startup (or falls back to KF_DEFAULT_MODEL) | |
| _kf_actual_model: str = KF_DEFAULT_MODEL | |
| async def probe_ki_fusion() -> bool: | |
| """Test ki_fusion with a minimal chat completion — /v1/models may not be available. | |
| Uses KF_MODEL env var as the model name (set it to your actual loaded model id). | |
| """ | |
| global _kf_actual_model | |
| _kf_actual_model = KF_DEFAULT_MODEL # always use configured name, no discovery | |
| try: | |
| headers = {"Content-Type": "application/json"} | |
| if KF_KEY: headers["Authorization"] = f"Bearer {KF_KEY}" | |
| payload = { | |
| "model": KF_DEFAULT_MODEL, | |
| "messages": [{"role": "user", "content": "ping"}], | |
| "max_tokens": 1, | |
| "temperature": 0.0, | |
| } | |
| async with httpx.AsyncClient(timeout=httpx.Timeout(None, connect=6.0, read=15.0), verify=False) as c: | |
| r = await c.post(f"{KF_BASE}/chat/completions/", headers=headers, json=payload) | |
| if r.status_code < 400: | |
| log.info(f"[NEXUS] ki_fusion online ✓ model={_kf_actual_model} url={KF_BASE}") | |
| provider_health["ki_fusion"] = True | |
| return True | |
| else: | |
| log.warning(f"[NEXUS] ki_fusion probe HTTP {r.status_code}: {r.text[:120]}") | |
| provider_health["ki_fusion"] = False | |
| return False | |
| except Exception as e: | |
| log.warning(f"[NEXUS] ki_fusion probe failed: {e}") | |
| provider_health["ki_fusion"] = False | |
| return False | |
| async def ki_fusion_watchdog(): | |
| """Background task: probe ki_fusion every 30s to auto-recover after outages.""" | |
| while True: | |
| await asyncio.sleep(30) | |
| was_ok = provider_health["ki_fusion"] | |
| now_ok = await probe_ki_fusion() | |
| if not was_ok and now_ok: | |
| log.info("[NEXUS] ki_fusion recovered — back online") | |
| elif was_ok and not now_ok: | |
| log.warning("[NEXUS] ki_fusion went offline") | |
| # ── Local CPU model (lazy) ──────────────────────────────────────── | |
| _local_pipe = None | |
| _local_loading = False | |
| def get_local_pipe(): | |
| global _local_pipe, _local_loading | |
| if _local_pipe is not None: | |
| return _local_pipe | |
| if _local_loading: | |
| return None | |
| _local_loading = True | |
| try: | |
| from transformers import pipeline | |
| log.info(f"Loading local model {LOCAL_MODEL_ID} on CPU...") | |
| _local_pipe = pipeline( | |
| "text-generation", | |
| model=LOCAL_MODEL_ID, | |
| device="cpu", | |
| torch_dtype="auto", | |
| max_new_tokens=512, | |
| ) | |
| log.info("Local model loaded.") | |
| except Exception as e: | |
| log.warning(f"Local model load failed: {e}") | |
| _local_pipe = None | |
| provider_health["local_cpu"] = False | |
| finally: | |
| _local_loading = False | |
| return _local_pipe | |
| # Pre-warm in background | |
| async def warm_local(): | |
| await asyncio.sleep(5) | |
| loop = asyncio.get_event_loop() | |
| await loop.run_in_executor(None, get_local_pipe) | |
| # ── Router ──────────────────────────────────────────────────────── | |
| def select_provider_and_model(task: str, complexity: int, cost_mode: str, | |
| force_provider: str = "") -> tuple[str,str,str]: | |
| """Returns (provider, model, reason)""" | |
| # Explicit override | |
| if force_provider and provider_health.get(force_provider, False): | |
| model = _kf_actual_model if force_provider=="ki_fusion" else HF_MODELS.get(task, HF_MODELS["simple"]) | |
| if force_provider == "local_cpu": model = LOCAL_MODEL_ID | |
| return force_provider, model, f"explicit override to {force_provider}" | |
| # Vision always -> HF (vision models) | |
| if task == "vision": | |
| if provider_health["hf_api"]: | |
| return "hf_api", HF_MODELS["vision"], "vision task -> HF Qwen2.5-VL" | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, "vision fallback -> ki_fusion" | |
| # Embedding -> HF | |
| if task == "embedding": | |
| if provider_health["hf_api"]: | |
| return "hf_api", "sentence-transformers/all-MiniLM-L6-v2", "embedding -> HF sentence-transformers" | |
| # Cost mode: cheap -> prefer HF or local | |
| if cost_mode == "cheap": | |
| if task == "simple" and complexity <= 4: | |
| if provider_health["hf_api"]: | |
| return "hf_api", HF_MODELS["fast"], f"cheap+simple(c={complexity}) -> HF fast" | |
| if provider_health["local_cpu"]: | |
| return "local_cpu", LOCAL_MODEL_ID, f"cheap+simple -> local CPU" | |
| # Cost mode: best -> ki_fusion first (your own GPU) | |
| if cost_mode == "best": | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, f"best mode -> ki_fusion (LM Studio)" | |
| # Balanced routing by task + complexity | |
| if task in ("planning",) and complexity >= 6: | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, f"planning+complex(c={complexity}) -> ki_fusion" | |
| if task == "code": | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, f"code task -> ki_fusion (LM Studio)" | |
| if provider_health["hf_api"]: | |
| return "hf_api", HF_MODELS["code"], "code -> HF Qwen2.5-Coder" | |
| if task == "reasoning" and complexity >= 7: | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, f"hard reasoning(c={complexity}) -> ki_fusion" | |
| # Default balanced: HF for most tasks (free tier, good quality) | |
| if provider_health["hf_api"]: | |
| hf_model = HF_MODELS.get(task, HF_MODELS["simple"]) | |
| return "hf_api", hf_model, f"{task}(c={complexity}) -> HF {hf_model.split('/')[-1]}" | |
| # Fallback: ki_fusion | |
| if provider_health["ki_fusion"]: | |
| return "ki_fusion", _kf_actual_model, f"fallback -> ki_fusion" | |
| # Last resort: local CPU | |
| return "local_cpu", LOCAL_MODEL_ID, "last resort -> local CPU" | |
| # ── Inference calls ─────────────────────────────────────────────── | |
| async def call_ki_fusion(messages, model, max_tokens=1024, temperature=0.7, stream=False): | |
| headers = {"Content-Type":"application/json"} | |
| if KF_KEY: headers["Authorization"] = f"Bearer {KF_KEY}" | |
| payload = {"model": model, "messages": messages, | |
| "max_tokens": max_tokens, "temperature": temperature, "stream": stream} | |
| # verify=False: ki-fusion-labs.de SSL cert may be expired (self-hosted). | |
| # Fast-fail connect: 6s tells us immediately if your server is off. | |
| timeout = httpx.Timeout(None, connect=6.0, read=90.0, write=10.0, pool=5.0) | |
| async with httpx.AsyncClient(timeout=timeout, verify=False) as client: | |
| if stream: | |
| async with client.stream("POST", f"{KF_BASE}/chat/completions/", | |
| headers=headers, json=payload) as resp: | |
| resp.raise_for_status() | |
| async for chunk in resp.aiter_bytes(): | |
| yield chunk | |
| else: | |
| r = await client.post(f"{KF_BASE}/chat/completions/", | |
| headers=headers, json=payload) | |
| r.raise_for_status() | |
| yield r.json() | |
| async def call_hf_api(messages, model, max_tokens=1024, temperature=0.7, stream=False): | |
| if not HF_TOKEN: | |
| raise Exception("HF_TOKEN not set") | |
| headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type":"application/json"} | |
| # HF OpenAI-compatible endpoint | |
| payload = {"model": model, "messages": messages, | |
| "max_tokens": max_tokens, "temperature": temperature, "stream": stream} | |
| url = f"{HF_OAI}/chat/completions" | |
| async with httpx.AsyncClient(timeout=90) as client: | |
| if stream: | |
| async with client.stream("POST", url, headers=headers, json=payload) as resp: | |
| resp.raise_for_status() | |
| async for chunk in resp.aiter_bytes(): | |
| yield chunk | |
| else: | |
| r = await client.post(url, headers=headers, json=payload) | |
| r.raise_for_status() | |
| yield r.json() | |
| async def call_local_cpu(messages, model, max_tokens=512, temperature=0.7, stream=False): | |
| loop = asyncio.get_event_loop() | |
| # Bug fix: if model is still loading (_local_loading=True), wait up to 90s | |
| # instead of failing immediately. This is the guaranteed last-resort provider. | |
| waited = 0 | |
| while _local_loading and waited < 90: | |
| log.info(f"[local_cpu] Model still loading, waiting… ({waited}s)") | |
| await asyncio.sleep(3) | |
| waited += 3 | |
| # If not loaded yet, trigger a load attempt now (synchronously in thread) | |
| if not _local_pipe and not _local_loading: | |
| log.info("[local_cpu] Triggering model load now (first request)") | |
| await loop.run_in_executor(None, get_local_pipe) | |
| def _run(): | |
| pipe = get_local_pipe() | |
| if not pipe: | |
| raise Exception("Local model not available — transformers load failed. Check logs for OOM or missing dependencies.") | |
| # Build prompt from messages | |
| chat_messages = [{"role": m.get("role","user"), | |
| "content": m.get("content","") if isinstance(m.get("content"), str) else ""} | |
| for m in messages] | |
| result = pipe(chat_messages, max_new_tokens=max_tokens, do_sample=temperature > 0, | |
| temperature=max(temperature, 0.01), pad_token_id=pipe.tokenizer.eos_token_id) | |
| if result and result[0]: | |
| generated = result[0].get("generated_text", "") | |
| if isinstance(generated, list): | |
| # Chat format: last message is the new assistant response | |
| last = generated[-1] if generated else {} | |
| content = last.get("content","") if isinstance(last, dict) else str(last) | |
| else: | |
| content = str(generated) | |
| # Strip prompt echo | |
| prompt_text = " ".join(m.get("content","") for m in messages if isinstance(m.get("content"),str)) | |
| if content.startswith(prompt_text): | |
| content = content[len(prompt_text):].strip() | |
| return content | |
| return "" | |
| content = await loop.run_in_executor(None, _run) | |
| response = { | |
| "id": f"local-{uuid.uuid4().hex[:8]}", | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": LOCAL_MODEL_ID, | |
| "choices": [{"index":0,"message":{"role":"assistant","content":content}, | |
| "finish_reason":"stop"}], | |
| "usage": {"prompt_tokens": 0, "completion_tokens": len(content.split()), "total_tokens": len(content.split())} | |
| } | |
| yield response | |
| # ── Core route function ─────────────────────────────────────────── | |
| async def route_inference(messages: list, max_tokens: int = 1024, temperature: float = 0.7, | |
| cost_mode: str = "balanced", force_provider: str = "", | |
| force_model: str = "", stream: bool = False): | |
| task = classify_task(messages) | |
| complexity = score_complexity(messages) | |
| provider, model, reason = select_provider_and_model(task, complexity, cost_mode, force_provider) | |
| if force_model: model = force_model | |
| t0 = time.time() | |
| tokens = 0 | |
| ok = True | |
| tried = [] | |
| providers_to_try = [provider] | |
| # Build fallback chain: ki_fusion -> hf_api can be skipped if health=False, | |
| # but local_cpu is ALWAYS added last — it's the guaranteed offline fallback. | |
| for fb in ["ki_fusion", "hf_api"]: | |
| if fb not in providers_to_try and provider_health.get(fb, True): | |
| providers_to_try.append(fb) | |
| # local_cpu: always last, always tried — never skip it | |
| if "local_cpu" not in providers_to_try: | |
| providers_to_try.append("local_cpu") | |
| last_err = None | |
| for p in providers_to_try: | |
| tried.append(p) | |
| try: | |
| fb_model = model | |
| if p == "ki_fusion": caller = call_ki_fusion | |
| elif p == "hf_api": caller = call_hf_api; fb_model = HF_MODELS.get(task, HF_MODELS["simple"]) | |
| else: caller = call_local_cpu; fb_model = LOCAL_MODEL_ID | |
| if p != provider: | |
| reason += f" | fallback to {p}" | |
| if stream: | |
| async def _stream_gen(): | |
| async for chunk in caller(messages, fb_model, max_tokens, temperature, stream=True): | |
| yield chunk | |
| ms = int((time.time()-t0)*1000) | |
| record(p, task, cost_mode, True, ms, 0, fb_model, reason) | |
| return { | |
| "_stream": True, | |
| "_gen": _stream_gen(), | |
| "_meta": {"provider":p,"model":fb_model,"task":task, | |
| "complexity":complexity,"reason":reason} | |
| } | |
| result = None | |
| async for r in caller(messages, fb_model, max_tokens, temperature, stream=False): | |
| result = r | |
| break | |
| ms = int((time.time()-t0)*1000) | |
| if isinstance(result, dict): | |
| tokens = result.get("usage",{}).get("total_tokens", 0) | |
| result.setdefault("_nexus", {}) | |
| result["_nexus"] = {"provider":p,"model":fb_model,"task":task, | |
| "complexity":complexity,"reason":reason, | |
| "latency_ms":ms,"fallback_chain":tried} | |
| record(p, task, cost_mode, True, ms, tokens, fb_model, reason) | |
| return result | |
| except Exception as e: | |
| last_err = str(e) | |
| log.error(f"[NEXUS] Provider '{p}' FAILED: {last_err}") | |
| # Mark unhealthy — watchdog will re-probe every 30s and restore when live again | |
| if p != "local_cpu": | |
| provider_health[p] = False | |
| ok = False | |
| ms = int((time.time()-t0)*1000) | |
| record(tried[-1] if tried else "none", task, cost_mode, False, ms, 0, model, reason) | |
| raise HTTPException(503, f"All providers failed. Last error: {last_err}") | |
| # ── FastAPI ─────────────────────────────────────────────────────── | |
| app = FastAPI(title="NEXUS Model Router") | |
| async def startup(): | |
| asyncio.create_task(warm_local()) | |
| # Probe ki_fusion immediately — discover actual model, set health state | |
| asyncio.create_task(probe_ki_fusion()) | |
| # Keep probing every 30s so recovery after outage is automatic | |
| asyncio.create_task(ki_fusion_watchdog()) | |
| def jresp(data, status=200): return JSONResponse(content=data, status_code=status) | |
| # ── OpenAI-compatible endpoints ─────────────────────────────────── | |
| async def oai_chat(request: Request): | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| max_tokens = body.get("max_tokens", 1024) | |
| temperature = body.get("temperature", 0.7) | |
| stream = body.get("stream", False) | |
| cost_mode = body.get("cost_mode", "balanced") # nexus extension | |
| force_prov = body.get("provider", "") # nexus extension | |
| force_model = body.get("model", "") | |
| # Detect if model is actually a provider name | |
| if force_model in ("ki_fusion","hf_api","local_cpu"): | |
| force_prov = force_model; force_model = "" | |
| result = await route_inference(messages, max_tokens, temperature, | |
| cost_mode, force_prov, force_model, stream) | |
| if isinstance(result, dict) and result.get("_stream"): | |
| return StreamingResponse(result["_gen"], media_type="text/event-stream", | |
| headers={"Cache-Control":"no-cache","X-Accel-Buffering":"no", | |
| "X-Nexus-Provider": result["_meta"]["provider"], | |
| "X-Nexus-Task": result["_meta"]["task"]}) | |
| # Add headers for transparency | |
| # HTTP headers must be latin-1 — strip any unicode (e.g. -> arrows in reason strings) | |
| def h(v): return str(v).encode('latin-1', errors='replace').decode('latin-1') | |
| return JSONResponse(content=result, headers={ | |
| "X-Nexus-Provider": h(result.get("_nexus",{}).get("provider","")), | |
| "X-Nexus-Task": h(result.get("_nexus",{}).get("task","")), | |
| "X-Nexus-Reason": h(result.get("_nexus",{}).get("reason","")[:120]), | |
| }) | |
| async def oai_models(): | |
| models = [ | |
| {"id":"nexus-auto","object":"model","owned_by":"nexus","description":"Auto-routed"}, | |
| {"id":"nexus-cheap","object":"model","owned_by":"nexus","description":"Cost-optimized routing"}, | |
| {"id":"nexus-best","object":"model","owned_by":"nexus","description":"Best-quality routing"}, | |
| {"id":"ki_fusion","object":"model","owned_by":"ki-fusion-labs","description":f"Primary LM Studio ({_kf_actual_model})"}, | |
| {"id":_kf_actual_model,"object":"model","owned_by":"ki-fusion-labs","description":"Ki-Fusion loaded model"}, | |
| {"id":"hf_api","object":"model","owned_by":"huggingface","description":"HF Serverless Inference"}, | |
| {"id":"local_cpu","object":"model","owned_by":"local","description":f"Local CPU: {LOCAL_MODEL_ID}"}, | |
| ] | |
| for k,v in HF_MODELS.items(): | |
| models.append({"id":v,"object":"model","owned_by":"huggingface","description":f"HF {k} model"}) | |
| return jresp({"object":"list","data":models}) | |
| # ── Nexus-specific API ──────────────────────────────────────────── | |
| async def api_route(request: Request): | |
| """Route with full metadata returned""" | |
| body = await request.json() | |
| messages = body.get("messages",[{"role":"user","content":body.get("prompt","")}]) | |
| result = await route_inference( | |
| messages, | |
| body.get("max_tokens",512), | |
| body.get("temperature",0.7), | |
| body.get("cost_mode","balanced"), | |
| body.get("provider",""), | |
| ) | |
| return jresp(result) | |
| async def api_classify(request: Request): | |
| body = await request.json() | |
| messages = body.get("messages",[{"role":"user","content":body.get("prompt","")}]) | |
| task = classify_task(messages) | |
| complexity = score_complexity(messages) | |
| provider, model, reason = select_provider_and_model(task, complexity, | |
| body.get("cost_mode","balanced")) | |
| return jresp({"task":task,"complexity":complexity, | |
| "selected_provider":provider,"selected_model":model,"reason":reason}) | |
| async def api_stats(): | |
| s = STATS.copy() | |
| # Compute avg latencies | |
| for p, d in s["by_provider"].items(): | |
| total = d["ok"] + d["fail"] | |
| d["total"] = total | |
| d["success_rate"] = round(d["ok"]/total*100,1) if total else 0 | |
| d["avg_ms"] = round(d["total_ms"]/d["ok"],0) if d["ok"] else 0 | |
| return jresp(s) | |
| async def api_health(): | |
| checks = {} | |
| # ki_fusion: ping via chat/completions (v1/models may not be available on PHP proxy) | |
| try: | |
| headers = {"Content-Type": "application/json"} | |
| if KF_KEY: headers["Authorization"] = f"Bearer {KF_KEY}" | |
| payload = {"model": _kf_actual_model, "messages": [{"role":"user","content":"ping"}], | |
| "max_tokens": 1, "temperature": 0.0} | |
| async with httpx.AsyncClient(timeout=httpx.Timeout(None, connect=6.0, read=8.0), verify=False) as c: | |
| r = await c.post(f"{KF_BASE}/chat/completions", headers=headers, json=payload) | |
| checks["ki_fusion"] = { | |
| "ok": r.status_code < 400, | |
| "http_status": r.status_code, | |
| "model": _kf_actual_model, | |
| "url": KF_BASE, | |
| "error": r.text[:120] if r.status_code >= 400 else None, | |
| } | |
| provider_health["ki_fusion"] = r.status_code < 400 | |
| except Exception as e: | |
| checks["ki_fusion"] = {"ok": False, "error": str(e)[:120], "model": _kf_actual_model} | |
| provider_health["ki_fusion"] = False | |
| # HF | |
| checks["hf_api"] = {"ok": bool(HF_TOKEN), "status": "token configured" if HF_TOKEN else "HF_TOKEN not set"} | |
| # Local | |
| checks["local_cpu"] = {"ok": _local_pipe is not None, | |
| "status": "loaded" if _local_pipe else ("loading" if _local_loading else "not loaded")} | |
| return jresp(checks) | |
| async def toggle_provider(provider: str, request: Request): | |
| if provider not in provider_health: | |
| raise HTTPException(404) | |
| body = await request.json() | |
| provider_health[provider] = body.get("enabled", not provider_health[provider]) | |
| return jresp({"provider":provider,"enabled":provider_health[provider]}) | |
| # ── MCP ─────────────────────────────────────────────────────────── | |
| MCP_TOOLS = [ | |
| {"name":"nexus_chat","description":"Send a chat completion through the NEXUS router. Auto-selects best provider.", | |
| "inputSchema":{"type":"object","required":["messages"],"properties":{ | |
| "messages": {"type":"array","items":{"type":"object"}}, | |
| "max_tokens": {"type":"integer","default":1024}, | |
| "temperature":{"type":"number","default":0.7}, | |
| "cost_mode": {"type":"string","enum":["cheap","balanced","best"],"default":"balanced"}, | |
| "provider": {"type":"string","enum":["","ki_fusion","hf_api","local_cpu"]}, | |
| }}}, | |
| {"name":"nexus_route_info","description":"Predict routing for a prompt without running inference.", | |
| "inputSchema":{"type":"object","required":["prompt"],"properties":{ | |
| "prompt": {"type":"string"}, | |
| "cost_mode": {"type":"string","default":"balanced"}, | |
| }}}, | |
| {"name":"nexus_stats","description":"Get routing statistics and provider performance.", | |
| "inputSchema":{"type":"object","properties":{}}}, | |
| {"name":"nexus_models","description":"List all available models and providers.", | |
| "inputSchema":{"type":"object","properties":{}}}, | |
| {"name":"nexus_health","description":"Check provider health and availability.", | |
| "inputSchema":{"type":"object","properties":{}}}, | |
| ] | |
| async def mcp_call(name, args): | |
| if name == "nexus_chat": | |
| result = await route_inference( | |
| args["messages"], args.get("max_tokens",1024), | |
| args.get("temperature",0.7), args.get("cost_mode","balanced"), | |
| args.get("provider","")) | |
| return json.dumps(result) | |
| if name == "nexus_route_info": | |
| msgs = [{"role":"user","content":args["prompt"]}] | |
| task = classify_task(msgs); comp = score_complexity(msgs) | |
| p, m, r = select_provider_and_model(task, comp, args.get("cost_mode","balanced")) | |
| return json.dumps({"task":task,"complexity":comp,"provider":p,"model":m,"reason":r}) | |
| if name == "nexus_stats": | |
| return json.dumps(STATS) | |
| if name == "nexus_models": | |
| return json.dumps({"hf_models":HF_MODELS,"local_model":LOCAL_MODEL_ID, | |
| "ki_fusion_model":_kf_actual_model}) | |
| if name == "nexus_health": | |
| return json.dumps(provider_health) | |
| return json.dumps({"error":f"unknown: {name}"}) | |
| async def mcp_sse(): | |
| async def stream(): | |
| init = {"jsonrpc":"2.0","method":"notifications/initialized", | |
| "params":{"serverInfo":{"name":"nexus-router","version":"1.0"}, | |
| "capabilities":{"tools":{}}}} | |
| yield f"data: {json.dumps(init)}\n\n" | |
| await asyncio.sleep(0.1) | |
| yield f"data: {json.dumps({'jsonrpc':'2.0','method':'notifications/tools/list_changed','params':{}})}\n\n" | |
| while True: | |
| await asyncio.sleep(25) | |
| yield f"data: {json.dumps({'jsonrpc':'2.0','method':'ping'})}\n\n" | |
| return StreamingResponse(stream(), media_type="text/event-stream", | |
| headers={"Cache-Control":"no-cache","X-Accel-Buffering":"no"}) | |
| async def mcp_rpc(request: Request): | |
| body = await request.json() | |
| method = body.get("method",""); rid = body.get("id",1) | |
| if method == "initialize": | |
| return jresp({"jsonrpc":"2.0","id":rid,"result":{ | |
| "serverInfo":{"name":"nexus-router","version":"1.0"},"capabilities":{"tools":{}}}}) | |
| if method == "tools/list": | |
| return jresp({"jsonrpc":"2.0","id":rid,"result":{"tools":MCP_TOOLS}}) | |
| if method == "tools/call": | |
| p = body.get("params",{}); res = await mcp_call(p.get("name",""), p.get("arguments",{})) | |
| return jresp({"jsonrpc":"2.0","id":rid,"result":{"content":[{"type":"text","text":res}]}}) | |
| return jresp({"jsonrpc":"2.0","id":rid,"error":{"code":-32601,"message":"not found"}}) | |
| # ── SPA ─────────────────────────────────────────────────────────── | |
| async def ui(): | |
| return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8") | |
| SPA = """<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width,initial-scale=1"> | |
| <title>NEXUS — Model Router</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&display=swap" rel="stylesheet"> | |
| <style> | |
| :root{ | |
| --bg:#08080f;--s1:#0f0f1a;--s2:#141422;--bd:#1a1a2e;--bd2:#20203a; | |
| --acc:#ff6b00;--acc2:#ff9500;--txt:#d8d8f0;--sub:#4a4a70;--dim:#1e1e38; | |
| --p1:#0ea5e9;--p2:#7c3aed;--p3:#2ed573; | |
| --cr:#ff2244;--lo:#2ed573;--font:'Space Mono',monospace; | |
| --cheap:#2ed573;--balanced:#0ea5e9;--best:#ff9500; | |
| --simple:#5a5a80;--reasoning:#7c3aed;--planning:#ff6b00; | |
| --code:#0ea5e9;--vision:#ff6b9d;--embedding:#2ed573; | |
| } | |
| *{box-sizing:border-box;margin:0;padding:0;} | |
| html,body{height:100%;overflow:hidden;} | |
| body{font-family:var(--font);background:var(--bg);color:var(--txt); | |
| display:flex;flex-direction:column;height:100vh;} | |
| body::after{content:'';position:fixed;inset:0;pointer-events:none; | |
| background:repeating-linear-gradient(0deg,transparent,transparent 3px,rgba(255,107,0,.005) 3px,rgba(255,107,0,.005) 4px);} | |
| /* HEADER */ | |
| #hdr{flex-shrink:0;display:flex;align-items:center;padding:.75rem 1.6rem;gap:1rem; | |
| border-bottom:1px solid var(--bd);background:linear-gradient(180deg,#0d0d1a,var(--bg));z-index:10;} | |
| #logo{font-size:1.25rem;font-weight:700;letter-spacing:2px; | |
| background:linear-gradient(90deg,var(--acc),var(--p1)); | |
| -webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text;} | |
| #logo-sub{font-size:.5rem;color:var(--sub);letter-spacing:.26em;text-transform:uppercase;margin-top:2px;} | |
| #hdr-stats{display:flex;gap:.45rem;flex:1;flex-wrap:wrap;} | |
| .hs{display:flex;align-items:center;gap:.35rem;background:var(--s1);border:1px solid var(--bd); | |
| border-radius:4px;padding:.22rem .5rem;font-size:.54rem;color:var(--sub);} | |
| .hs-n{font-size:.85rem;font-weight:700;line-height:1;} | |
| .provider-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0;} | |
| .pulse{animation:pulse 2s infinite;} | |
| @keyframes pulse{0%,100%{opacity:1}50%{opacity:.25}} | |
| /* MAIN LAYOUT */ | |
| #main{flex:1;display:flex;min-height:0;overflow:hidden;} | |
| /* LEFT PANEL: playground */ | |
| #left{width:480px;flex-shrink:0;display:flex;flex-direction:column; | |
| border-right:1px solid var(--bd);overflow:hidden;} | |
| #left-hdr{flex-shrink:0;padding:.6rem 1rem;border-bottom:1px solid var(--bd); | |
| background:var(--s1);display:flex;align-items:center;justify-content:space-between;} | |
| #left-hdr-title{font-size:.62rem;font-weight:700;letter-spacing:.12em;color:var(--acc);} | |
| #left-body{flex:1;overflow-y:auto;padding:.9rem 1rem;} | |
| #left-body::-webkit-scrollbar{width:3px;} | |
| #left-body::-webkit-scrollbar-thumb{background:var(--bd2);} | |
| /* PLAYGROUND */ | |
| .pg-section{margin-bottom:.8rem;} | |
| .pg-label{font-size:.5rem;color:var(--sub);text-transform:uppercase;letter-spacing:.12em;margin-bottom:.22rem;} | |
| #pg-prompt{width:100%;background:var(--s2);border:1px solid var(--bd2);border-radius:6px; | |
| padding:.55rem .7rem;font-family:var(--font);font-size:.7rem;color:var(--txt);outline:none; | |
| min-height:80px;resize:vertical;line-height:1.55;transition:border-color .12s;} | |
| #pg-prompt:focus{border-color:var(--acc);} | |
| .ctrl-row{display:grid;grid-template-columns:1fr 1fr 1fr;gap:.45rem;} | |
| .ctl{display:flex;flex-direction:column;gap:.18rem;} | |
| .ctl label{font-size:.48rem;color:var(--sub);text-transform:uppercase;letter-spacing:.1em;} | |
| .ctl select,.ctl input{background:var(--s2);border:1px solid var(--bd2);border-radius:4px; | |
| padding:.32rem .5rem;font-family:var(--font);font-size:.65rem;color:var(--txt);outline:none; | |
| transition:border-color .12s;} | |
| .ctl select:focus,.ctl input:focus{border-color:var(--acc);} | |
| .ctl select option{background:var(--s2);} | |
| #btn-send{width:100%;background:var(--acc);color:#000;border:none;padding:.52rem; | |
| font-family:var(--font);font-size:.7rem;font-weight:700;letter-spacing:.1em; | |
| text-transform:uppercase;border-radius:5px;cursor:pointer;transition:background .1s,transform .08s; | |
| margin-top:.35rem;} | |
| #btn-send:hover{background:var(--acc2);transform:translateY(-1px);} | |
| #btn-send:disabled{background:var(--dim);color:var(--sub);cursor:not-allowed;transform:none;} | |
| #btn-classify{width:100%;background:var(--s2);color:var(--sub);border:1px dashed var(--bd2); | |
| padding:.38rem;font-family:var(--font);font-size:.62rem;letter-spacing:.1em;text-transform:uppercase; | |
| border-radius:5px;cursor:pointer;transition:all .1s;margin-top:.25rem;} | |
| #btn-classify:hover{border-color:var(--p2);color:var(--p2);} | |
| /* ROUTING PREVIEW */ | |
| #route-preview{background:var(--s1);border:1px solid var(--bd);border-radius:7px; | |
| padding:.65rem .8rem;margin-top:.6rem;display:none;} | |
| #route-preview.show{display:block;} | |
| .rp-row{display:flex;align-items:center;gap:.4rem;margin-bottom:.3rem;flex-wrap:wrap;} | |
| .rp-badge{font-size:.5rem;padding:2px 7px;border-radius:3px;font-weight:700;text-transform:uppercase;letter-spacing:.08em;} | |
| .rp-arrow{font-size:.65rem;color:var(--sub);} | |
| .rp-reason{font-size:.56rem;color:var(--sub);margin-top:.35rem;line-height:1.5;} | |
| /* RESPONSE */ | |
| #response-box{background:var(--s1);border:1px solid var(--bd);border-radius:7px; | |
| padding:.75rem .9rem;margin-top:.65rem;display:none;} | |
| #response-box.show{display:block;} | |
| #response-meta{display:flex;gap:.35rem;align-items:center;flex-wrap:wrap;margin-bottom:.55rem; | |
| padding-bottom:.45rem;border-bottom:1px solid var(--bd);} | |
| .r-badge{font-size:.48rem;padding:2px 7px;border-radius:3px;font-weight:700;} | |
| #response-text{font-size:.7rem;color:var(--txt);line-height:1.68;white-space:pre-wrap; | |
| max-height:260px;overflow-y:auto;word-break:break-word;} | |
| #response-text::-webkit-scrollbar{width:3px;} | |
| #response-text::-webkit-scrollbar-thumb{background:var(--bd2);} | |
| /* RIGHT PANEL */ | |
| #right{flex:1;display:flex;flex-direction:column;overflow:hidden;} | |
| #tabs{flex-shrink:0;display:flex;border-bottom:1px solid var(--bd);background:var(--s1);} | |
| .tab{padding:.52rem 1.1rem;font-size:.6rem;font-weight:700;letter-spacing:.1em; | |
| cursor:pointer;color:var(--sub);border-bottom:2px solid transparent;transition:all .12s;} | |
| .tab.on{color:var(--acc);border-bottom-color:var(--acc);} | |
| /* PANELS */ | |
| .panel{flex:1;overflow-y:auto;padding:1.1rem 1.4rem;display:none;} | |
| .panel::-webkit-scrollbar{width:4px;} | |
| .panel::-webkit-scrollbar-thumb{background:var(--bd2);} | |
| .panel.on{display:block;} | |
| /* ROUTING LOG */ | |
| .log-entry{background:var(--s1);border:1px solid var(--bd);border-radius:7px; | |
| padding:.6rem .75rem;margin-bottom:.4rem;animation:cin .15s ease;} | |
| @keyframes cin{from{opacity:0;transform:translateY(3px)}to{opacity:1;transform:none}} | |
| .le-top{display:flex;align-items:center;gap:.38rem;margin-bottom:.3rem;flex-wrap:wrap;} | |
| .le-id{font-size:.48rem;color:var(--sub);font-family:monospace;} | |
| .le-provider{font-size:.5rem;padding:1px 6px;border-radius:3px;font-weight:700;text-transform:uppercase;} | |
| .le-task{font-size:.5rem;padding:1px 6px;border-radius:3px;font-weight:700;} | |
| .le-status{font-size:.5rem;padding:1px 5px;border-radius:3px;} | |
| .le-ok{background:#02130a;color:var(--lo);border:1px solid rgba(46,213,115,.2);} | |
| .le-fail{background:#1a0308;color:var(--cr);border:1px solid rgba(255,34,68,.2);} | |
| .le-ms{font-size:.52rem;color:var(--sub);margin-left:auto;} | |
| .le-reason{font-size:.57rem;color:var(--sub);line-height:1.45;} | |
| .le-model{font-size:.5rem;color:var(--dim);margin-top:.2rem;font-family:monospace;} | |
| /* STATS GRID */ | |
| .stats-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:.7rem;margin-bottom:1rem;} | |
| .stat-card{background:var(--s1);border:1px solid var(--bd);border-radius:8px;padding:.75rem .9rem;} | |
| .sc-title{font-size:.5rem;color:var(--sub);text-transform:uppercase;letter-spacing:.12em;margin-bottom:.45rem; | |
| display:flex;align-items:center;gap:.4rem;} | |
| .sc-n{font-size:1.4rem;font-weight:700;line-height:1;color:var(--txt);} | |
| .sc-sub{font-size:.52rem;color:var(--sub);margin-top:.2rem;} | |
| .bar-container{margin-top:.5rem;} | |
| .bar-row{display:flex;align-items:center;gap:.4rem;margin-bottom:.28rem;} | |
| .bar-label{font-size:.5rem;color:var(--sub);width:70px;flex-shrink:0;text-transform:capitalize;} | |
| .bar-track{flex:1;height:5px;background:var(--bd2);border-radius:3px;overflow:hidden;} | |
| .bar-fill{height:100%;border-radius:3px;transition:width .4s;} | |
| .bar-val{font-size:.5rem;color:var(--sub);min-width:28px;text-align:right;} | |
| /* PROVIDER CARDS */ | |
| .provider-grid{display:grid;grid-template-columns:1fr 1fr 1fr;gap:.7rem;margin-bottom:1rem;} | |
| .pc{background:var(--s1);border:1px solid var(--bd);border-radius:8px;padding:.9rem 1rem; | |
| position:relative;overflow:hidden;} | |
| .pc::before{content:'';position:absolute;top:0;left:0;right:0;height:2px;} | |
| .pc.ki::before{background:var(--p1);}.pc.hf::before{background:var(--p2);}.pc.lc::before{background:var(--p3);} | |
| .pc-name{font-size:.75rem;font-weight:700;color:var(--txt);margin-bottom:.18rem;} | |
| .pc-sub{font-size:.52rem;color:var(--sub);margin-bottom:.5rem;} | |
| .pc-status{display:flex;align-items:center;gap:.3rem;margin-bottom:.4rem;} | |
| .pc-dot{width:7px;height:7px;border-radius:50%;} | |
| .pc-active{color:var(--lo);font-size:.55rem;}.pc-inactive{color:var(--cr);font-size:.55rem;} | |
| .pc-stats{font-size:.54rem;color:var(--sub);line-height:1.7;} | |
| .pc-stat-n{color:var(--txt);} | |
| .pc-toggle{width:100%;margin-top:.6rem;background:var(--s2);border:1px solid var(--bd2); | |
| color:var(--sub);padding:.3rem;font-family:var(--font);font-size:.58rem; | |
| border-radius:4px;cursor:pointer;transition:all .1s;} | |
| .pc-toggle:hover{border-color:var(--acc);color:var(--acc);} | |
| /* MODELS TABLE */ | |
| .models-table{width:100%;border-collapse:collapse;font-size:.6rem;} | |
| .models-table th{text-align:left;padding:.4rem .6rem;font-size:.5rem;color:var(--sub); | |
| text-transform:uppercase;letter-spacing:.1em;border-bottom:1px solid var(--bd);} | |
| .models-table td{padding:.38rem .6rem;border-bottom:1px solid var(--bd);vertical-align:top;} | |
| .models-table tr:hover td{background:var(--s1);} | |
| .m-provider{font-size:.48rem;padding:1px 5px;border-radius:3px;font-weight:700;} | |
| /* TOASTS */ | |
| #toasts{position:fixed;bottom:1rem;right:1rem;z-index:200;display:flex;flex-direction:column;gap:.35rem;} | |
| .tst{background:var(--s1);border:1px solid var(--bd2);border-left:3px solid var(--acc); | |
| padding:.4rem .75rem;font-size:.6rem;border-radius:5px;animation:tin .15s ease;color:var(--txt);} | |
| .tst.err{border-left-color:var(--cr);}.tst.ok{border-left-color:var(--lo);} | |
| @keyframes tin{from{opacity:0;transform:translateX(12px)}to{opacity:1;transform:none}} | |
| #mcp-hint{position:fixed;bottom:1rem;left:1rem;z-index:10;background:var(--s1); | |
| border:1px solid var(--bd2);border-left:3px solid var(--p1);border-radius:5px; | |
| padding:.38rem .75rem;font-size:.52rem;color:var(--sub);} | |
| #mcp-hint code{color:var(--p1);} | |
| </style> | |
| </head> | |
| <body> | |
| <div id="hdr"> | |
| <div> | |
| <div id="logo">NEXUS</div> | |
| <div id="logo-sub">Model Router & Inference Gateway · ki-fusion-labs.de</div> | |
| </div> | |
| <div id="hdr-stats"> | |
| <div class="hs"><span class="hs-n" id="hs-total" style="color:var(--txt)">0</span>REQUESTS</div> | |
| <div class="hs"><span class="provider-dot pulse" style="background:var(--p1)"></span><span class="hs-n" id="hs-kf" style="color:var(--p1)">?</span>KI-FUSION</div> | |
| <div class="hs"><span class="provider-dot" id="dot-hf" style="background:var(--sub)"></span><span class="hs-n" id="hs-hf" style="color:var(--p2)">?</span>HF API</div> | |
| <div class="hs"><span class="provider-dot" id="dot-lc" style="background:var(--sub)"></span><span class="hs-n" id="hs-lc" style="color:var(--p3)">?</span>LOCAL</div> | |
| </div> | |
| </div> | |
| <div id="main"> | |
| <!-- LEFT: Playground --> | |
| <div id="left"> | |
| <div id="left-hdr"> | |
| <span id="left-hdr-title">INFERENCE PLAYGROUND</span> | |
| <span style="font-size:.52rem;color:var(--sub)">OpenAI-compatible</span> | |
| </div> | |
| <div id="left-body"> | |
| <div class="pg-section"> | |
| <div class="pg-label">Prompt</div> | |
| <textarea id="pg-prompt" placeholder="Enter your prompt... The router will automatically classify it and select the best model."></textarea> | |
| </div> | |
| <div class="ctrl-row"> | |
| <div class="ctl"> | |
| <label>Cost Mode</label> | |
| <select id="pg-cost"> | |
| <option value="cheap">Cheap</option> | |
| <option value="balanced" selected>Balanced</option> | |
| <option value="best">Best</option> | |
| </select> | |
| </div> | |
| <div class="ctl"> | |
| <label>Force Provider</label> | |
| <select id="pg-prov"> | |
| <option value="">Auto-route</option> | |
| <option value="ki_fusion">ki_fusion</option> | |
| <option value="hf_api">hf_api</option> | |
| <option value="local_cpu">local_cpu</option> | |
| </select> | |
| </div> | |
| <div class="ctl"> | |
| <label>Max Tokens</label> | |
| <input type="number" id="pg-tokens" value="512" min="64" max="4096" step="64"> | |
| </div> | |
| </div> | |
| <button id="btn-classify">📊 Classify Only (no inference)</button> | |
| <button id="btn-send">⚡ Route & Infer</button> | |
| <div id="route-preview"> | |
| <div class="rp-row" id="rp-badges"></div> | |
| <div class="rp-reason" id="rp-reason"></div> | |
| </div> | |
| <div id="response-box"> | |
| <div id="response-meta"></div> | |
| <div id="response-text"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- RIGHT: Panels --> | |
| <div id="right"> | |
| <div id="tabs"> | |
| <div class="tab on" id="tab-log">ROUTING LOG</div> | |
| <div class="tab" id="tab-providers">PROVIDERS</div> | |
| <div class="tab" id="tab-stats">STATS</div> | |
| <div class="tab" id="tab-models">MODELS</div> | |
| </div> | |
| <div class="panel on" id="panel-log"></div> | |
| <div class="panel" id="panel-providers"></div> | |
| <div class="panel" id="panel-stats"></div> | |
| <div class="panel" id="panel-models"></div> | |
| </div> | |
| </div> | |
| <div id="toasts"></div> | |
| <div id="mcp-hint">MCP: <code>nexus_chat</code> | <code>POST /v1/chat/completions</code> | <code>GET /mcp/sse</code></div> | |
| <script> | |
| var STATS_DATA={}; | |
| var PROVIDER_COLORS={ki_fusion:'#0ea5e9',hf_api:'#7c3aed',local_cpu:'#2ed573'}; | |
| var TASK_COLORS={simple:'#5a5a80',reasoning:'#7c3aed',planning:'#ff6b00', | |
| code:'#0ea5e9',vision:'#ff6b9d',embedding:'#2ed573',unknown:'#3a3a60'}; | |
| var COST_COLORS={cheap:'#2ed573',balanced:'#0ea5e9',best:'#ff9500'}; | |
| function esc(s){return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');} | |
| function toast(msg,type){ | |
| var el=document.createElement('div');el.className='tst'+(type?' '+type:''); | |
| el.textContent=msg;document.getElementById('toasts').appendChild(el); | |
| setTimeout(function(){el.remove();},2800); | |
| } | |
| // Tabs | |
| function showTab(t){ | |
| ['log','providers','stats','models'].forEach(function(x){ | |
| document.getElementById('tab-'+x).className='tab'+(x==t?' on':''); | |
| document.getElementById('panel-'+x).className='panel'+(x==t?' on':''); | |
| }); | |
| if(t=='providers') renderProviders(); | |
| if(t=='stats') renderStats(); | |
| if(t=='models') renderModels(); | |
| } | |
| document.getElementById('tab-log').addEventListener('click',function(){showTab('log');}); | |
| document.getElementById('tab-providers').addEventListener('click',function(){showTab('providers');}); | |
| document.getElementById('tab-stats').addEventListener('click',function(){showTab('stats');}); | |
| document.getElementById('tab-models').addEventListener('click',function(){showTab('models');}); | |
| // Classify | |
| document.getElementById('btn-classify').addEventListener('click',function(){ | |
| var prompt=document.getElementById('pg-prompt').value.trim(); | |
| if(!prompt){toast('Enter a prompt','err');return;} | |
| var cost=document.getElementById('pg-cost').value; | |
| fetch('/api/classify',{method:'POST',headers:{'Content-Type':'application/json'}, | |
| body:JSON.stringify({prompt:prompt,cost_mode:cost})}) | |
| .then(function(r){return r.json();}).then(function(d){ | |
| showRoutePreview(d); | |
| toast('Classified: '+d.task+' (complexity '+d.complexity+')'); | |
| }).catch(function(){toast('Error','err');}); | |
| }); | |
| function showRoutePreview(d){ | |
| var prev=document.getElementById('route-preview'); | |
| prev.classList.add('show'); | |
| var tc=TASK_COLORS[d.task]||'#aaa'; | |
| var pc=PROVIDER_COLORS[d.selected_provider||d.provider]||'#aaa'; | |
| var cc=COST_COLORS[d.cost_mode]||'#aaa'; | |
| var badges=document.getElementById('rp-badges'); | |
| badges.innerHTML= | |
| '<span class="rp-badge" style="background:'+tc+'18;color:'+tc+';border:1px solid '+tc+'33">'+esc(d.task)+'</span>' | |
| +'<span class="rp-arrow">→</span>' | |
| +'<span class="rp-badge" style="font-size:.5rem;background:var(--s2);color:var(--sub)">complexity '+esc(String(d.complexity||d.complexity_score||'?'))+'/10</span>' | |
| +'<span class="rp-arrow">→</span>' | |
| +'<span class="rp-badge" style="background:'+pc+'18;color:'+pc+';border:1px solid '+pc+'33">' | |
| +esc(d.selected_provider||d.provider)+'</span>'; | |
| document.getElementById('rp-reason').textContent=d.reason||''; | |
| } | |
| // Infer | |
| document.getElementById('btn-send').addEventListener('click',function(){ | |
| var prompt=document.getElementById('pg-prompt').value.trim(); | |
| if(!prompt){toast('Enter a prompt','err');return;} | |
| var cost=document.getElementById('pg-cost').value; | |
| var prov=document.getElementById('pg-prov').value; | |
| var tokens=parseInt(document.getElementById('pg-tokens').value)||512; | |
| var btn=document.getElementById('btn-send'); | |
| btn.disabled=true;btn.textContent='Routing...'; | |
| var t0=Date.now(); | |
| fetch('/v1/chat/completions',{method:'POST',headers:{'Content-Type':'application/json'}, | |
| body:JSON.stringify({ | |
| messages:[{role:'user',content:prompt}], | |
| max_tokens:tokens,cost_mode:cost, | |
| provider:prov||undefined | |
| })}) | |
| .then(function(r){ | |
| var provider=r.headers.get('X-Nexus-Provider')||''; | |
| var task=r.headers.get('X-Nexus-Task')||''; | |
| var reason=r.headers.get('X-Nexus-Reason')||''; | |
| return r.json().then(function(d){return {data:d,provider:provider,task:task,reason:reason};}); | |
| }) | |
| .then(function(obj){ | |
| var d=obj.data; var ms=Date.now()-t0; | |
| var nx=d._nexus||{provider:obj.provider,task:obj.task,reason:obj.reason,latency_ms:ms,model:'?'}; | |
| var content=''; | |
| if(d.choices&&d.choices[0]){ | |
| var msg=d.choices[0].message; | |
| content=msg.content||msg.text||''; | |
| } | |
| showResponse(content,nx); | |
| showRoutePreview({task:nx.task,complexity:nx.complexity||'?', | |
| selected_provider:nx.provider,reason:nx.reason}); | |
| loadLog();loadHeaderStats(); | |
| }) | |
| .catch(function(e){toast('Error: '+e.message,'err');}) | |
| .finally(function(){btn.disabled=false;btn.innerHTML='⚡ Route & Infer';}); | |
| }); | |
| function showResponse(content,nx){ | |
| var box=document.getElementById('response-box'); | |
| box.classList.add('show'); | |
| var pc=PROVIDER_COLORS[nx.provider]||'#aaa'; | |
| var tc=TASK_COLORS[nx.task]||'#aaa'; | |
| var meta=document.getElementById('response-meta'); | |
| meta.innerHTML= | |
| '<span class="r-badge" style="background:'+pc+'18;color:'+pc+';border:1px solid '+pc+'33">'+esc(nx.provider)+'</span>' | |
| +'<span class="r-badge" style="background:'+tc+'18;color:'+tc+';border:1px solid '+tc+'33">'+esc(nx.task)+'</span>' | |
| +'<span class="r-badge" style="background:var(--s2);color:var(--sub);font-size:.46rem">'+esc(String(nx.latency_ms||'?'))+'ms</span>' | |
| +(nx.model?'<span class="r-badge" style="background:var(--dim);color:var(--sub);font-size:.44rem;font-family:monospace">'+esc((nx.model||'').split('/').pop())+'</span>':''); | |
| document.getElementById('response-text').textContent=content; | |
| } | |
| // Routing Log | |
| function loadLog(){ | |
| fetch('/api/stats').then(function(r){return r.json();}).then(function(s){ | |
| STATS_DATA=s; | |
| renderLog(s.recent||[]); | |
| loadHeaderStats(s); | |
| }).catch(function(){}); | |
| } | |
| function loadHeaderStats(s){ | |
| if(!s){fetch('/api/stats').then(function(r){return r.json();}).then(loadHeaderStats);return;} | |
| document.getElementById('hs-total').textContent=s.total_requests||0; | |
| var bp=s.by_provider||{}; | |
| document.getElementById('hs-kf').textContent=(bp.ki_fusion||{}).ok||0; | |
| document.getElementById('hs-hf').textContent=(bp.hf_api||{}).ok||0; | |
| document.getElementById('hs-lc').textContent=(bp.local_cpu||{}).ok||0; | |
| // Health dots | |
| fetch('/api/health').then(function(r){return r.json();}).then(function(h){ | |
| var hf=h.hf_api||{}; | |
| var lc=h.local_cpu||{}; | |
| document.getElementById('dot-hf').style.background=hf.ok?'var(--p2)':'var(--sub)'; | |
| document.getElementById('dot-lc').style.background=lc.ok?'var(--p3)':'var(--sub)'; | |
| if(hf.ok)document.getElementById('dot-hf').classList.add('pulse'); | |
| if(lc.ok)document.getElementById('dot-lc').classList.add('pulse'); | |
| }).catch(function(){}); | |
| } | |
| function renderLog(entries){ | |
| var panel=document.getElementById('panel-log'); | |
| if(!entries.length){ | |
| panel.innerHTML='<div style="text-align:center;padding:2rem;font-size:.6rem;color:var(--sub)">No routing decisions yet<br><span style="font-size:.52rem;opacity:.5">Use the playground to send requests</span></div>'; | |
| return; | |
| } | |
| panel.innerHTML=entries.map(function(e){ | |
| var pc=PROVIDER_COLORS[e.provider]||'#aaa'; | |
| var tc=TASK_COLORS[e.task]||'#aaa'; | |
| var cc=COST_COLORS[e.cost_mode]||'#aaa'; | |
| return '<div class="log-entry">' | |
| +'<div class="le-top">' | |
| +'<span class="le-id">'+esc(e.id)+'</span>' | |
| +'<span class="le-provider" style="background:'+pc+'18;color:'+pc+';border:1px solid '+pc+'33">'+esc(e.provider)+'</span>' | |
| +'<span class="le-task" style="background:'+tc+'18;color:'+tc+';border:1px solid '+tc+'33">'+esc(e.task)+'</span>' | |
| +'<span class="le-status '+(e.ok?'le-ok':'le-fail')+'">'+(e.ok?'OK':'FAIL')+'</span>' | |
| +'<span class="le-ms">'+e.ms+'ms'+(e.tokens?' · '+e.tokens+'tok':'')+'</span>' | |
| +'</div>' | |
| +'<div class="le-reason">'+esc(e.reason||'')+'</div>' | |
| +'<div class="le-model">'+esc(e.model||'')+'</div>' | |
| +'</div>'; | |
| }).join(''); | |
| } | |
| function renderProviders(){ | |
| fetch('/api/health').then(function(r){return r.json();}).then(function(h){ | |
| var s=STATS_DATA.by_provider||{}; | |
| var panel=document.getElementById('panel-providers'); | |
| var providers=[ | |
| {key:'ki_fusion',cls:'ki',name:'KI-FUSION LABS',sub:'LM Studio via PHP proxy', | |
| url:'ki-fusion-labs.de/v1',desc:'Your local RTX 5090 — highest quality, zero cost'}, | |
| {key:'hf_api',cls:'hf',name:'HUGGINGFACE API',sub:'Serverless Inference (HF_TOKEN)', | |
| url:'api-inference.huggingface.co',desc:'Free tier with rate limits; Llama, Qwen, Mistral, Phi'}, | |
| {key:'local_cpu',cls:'lc',name:'LOCAL CPU',sub:'Qwen2.5-0.5B via transformers', | |
| url:'in-process',desc:'Always-on fallback; slow but zero-latency network; no API key needed'}, | |
| ]; | |
| panel.innerHTML='<div class="provider-grid">'+providers.map(function(p){ | |
| var health=h[p.key]||{}; | |
| var stat=s[p.key]||{ok:0,fail:0,avg_ms:0}; | |
| var ok=health.ok; | |
| return '<div class="pc '+p.cls+'">' | |
| +'<div class="pc-name">'+esc(p.name)+'</div>' | |
| +'<div class="pc-sub">'+esc(p.sub)+'</div>' | |
| +'<div class="pc-status"><div class="pc-dot '+(ok?'pulse':'')+'" style="background:'+(ok?PROVIDER_COLORS[p.key]:'var(--cr)')+'"></div>' | |
| +'<span class="'+(ok?'pc-active':'pc-inactive')+'">'+(ok?'ACTIVE':'OFFLINE')+'</span></div>' | |
| +'<div class="pc-stats">' | |
| +'Requests: <span class="pc-stat-n">'+(stat.ok||0)+'</span> ok / <span style="color:var(--cr)">'+(stat.fail||0)+'</span> fail<br>' | |
| +'Avg latency: <span class="pc-stat-n">'+(stat.avg_ms||'—')+'</span> ms<br>' | |
| +'URL: <span style="font-size:.48rem;opacity:.5">'+esc(p.url)+'</span><br>' | |
| +'<span style="font-size:.52rem;opacity:.6;line-height:1.5">'+esc(p.desc)+'</span>' | |
| +'</div>' | |
| +'<button class="pc-toggle" data-key="'+p.key+'">'+(ok?'Disable':'Enable')+'</button>' | |
| +'</div>'; | |
| }).join('')+'</div>'; | |
| panel.querySelectorAll('.pc-toggle').forEach(function(btn){ | |
| btn.addEventListener('click',function(){ | |
| var key=this.getAttribute('data-key'); | |
| var h_entry=h[key]||{}; | |
| fetch('/api/providers/'+key+'/toggle',{method:'POST', | |
| headers:{'Content-Type':'application/json'}, | |
| body:JSON.stringify({enabled:!h_entry.ok})}) | |
| .then(function(){renderProviders();toast('Provider updated','ok');}) | |
| .catch(function(){toast('Error','err');}); | |
| }); | |
| }); | |
| }).catch(function(){}); | |
| } | |
| function renderStats(){ | |
| var s=STATS_DATA; | |
| if(!s||!s.total_requests){loadLog();setTimeout(renderStats,500);return;} | |
| var panel=document.getElementById('panel-stats'); | |
| var bp=s.by_provider||{}; | |
| var bt=s.by_task||{}; | |
| var bc=s.by_cost||{}; | |
| var total=s.total_requests||1; | |
| function barRow(label,val,max,col){ | |
| var pct=max>0?Math.round(val/max*100):0; | |
| return '<div class="bar-row"><span class="bar-label">'+esc(label)+'</span>' | |
| +'<div class="bar-track"><div class="bar-fill" style="width:'+pct+'%;background:'+col+'"></div></div>' | |
| +'<span class="bar-val">'+val+'</span></div>'; | |
| } | |
| var maxTask=Math.max(1,...Object.values(bt)); | |
| var maxProv=Math.max(1,...Object.values(bp).map(function(d){return d.ok||0;})); | |
| panel.innerHTML= | |
| '<div class="stats-grid">' | |
| +'<div class="stat-card"><div class="sc-title">Total Requests</div>' | |
| +'<div class="sc-n">'+total+'</div></div>' | |
| +'<div class="stat-card"><div class="sc-title">By Task</div>' | |
| +'<div class="bar-container">' | |
| +Object.entries(bt).filter(function(e){return e[1]>0;}).map(function(e){ | |
| return barRow(e[0],e[1],maxTask,TASK_COLORS[e[0]]||'#aaa');}).join('') | |
| +'</div></div>' | |
| +'<div class="stat-card"><div class="sc-title">By Cost Mode</div>' | |
| +'<div class="bar-container">' | |
| +Object.entries(bc).filter(function(e){return e[1]>0;}).map(function(e){ | |
| return barRow(e[0],e[1],total,COST_COLORS[e[0]]||'#aaa');}).join('') | |
| +'</div></div>' | |
| +'</div>' | |
| +'<div class="stats-grid">' | |
| +['ki_fusion','hf_api','local_cpu'].map(function(k){ | |
| var d=bp[k]||{ok:0,fail:0,avg_ms:0,tokens:0}; | |
| var pc=PROVIDER_COLORS[k]||'#aaa'; | |
| var total_p=((d.ok||0)+(d.fail||0))||1; | |
| var sr=Math.round((d.ok||0)/total_p*100); | |
| return '<div class="stat-card"><div class="sc-title" style="color:'+pc+'">'+k+'</div>' | |
| +'<div class="sc-n" style="color:'+pc+'">'+(d.ok||0)+'</div>' | |
| +'<div class="sc-sub">success rate: '+sr+'%</div>' | |
| +'<div class="sc-sub">avg latency: '+(d.avg_ms||'—')+'ms</div>' | |
| +'<div class="sc-sub">tokens: '+(d.tokens||0)+'</div>' | |
| +'</div>'; | |
| }).join('') | |
| +'</div>'; | |
| } | |
| function renderModels(){ | |
| fetch('/v1/models').then(function(r){return r.json();}).then(function(d){ | |
| var models=d.data||[]; | |
| var panel=document.getElementById('panel-models'); | |
| panel.innerHTML='<table class="models-table"><thead><tr>' | |
| +'<th>Model ID</th><th>Provider</th><th>Description</th></tr></thead><tbody>' | |
| +models.map(function(m){ | |
| var owner=m.owned_by||''; | |
| var col=owner=='ki-fusion-labs'?'var(--p1)':owner=='huggingface'?'var(--p2)':owner=='local'?'var(--p3)':'var(--sub)'; | |
| return '<tr><td style="font-family:monospace;font-size:.58rem">'+esc(m.id)+'</td>' | |
| +'<td><span class="m-provider" style="background:'+col+'18;color:'+col+';border:1px solid '+col+'33">'+esc(m.owned_by||'')+'</span></td>' | |
| +'<td style="font-size:.56rem;color:var(--sub)">'+esc(m.description||'')+'</td></tr>'; | |
| }).join('') | |
| +'</tbody></table>'; | |
| }).catch(function(){toast('Error loading models','err');}); | |
| } | |
| document.addEventListener('keydown',function(e){ | |
| if((e.ctrlKey||e.metaKey)&&e.key=='Enter'){ | |
| var active=document.activeElement; | |
| if(active&&active.id=='pg-prompt') document.getElementById('btn-send').click(); | |
| } | |
| }); | |
| loadLog(); | |
| setInterval(function(){loadLog();},8000); | |
| </script> | |
| </body> | |
| </html>""" |