Spaces:

sharktide
/

lightning

Running

App Files Files Community

sharktide commited on Apr 25

Commit

6a3237f

verified ·

1 Parent(s): adba311

Update gen.py

Browse files

Files changed (1) hide show

gen.py +349 -1

gen.py CHANGED Viewed

@@ -941,4 +941,352 @@ def return_models_openai():
           "owned_by": "inferenceport-ai"
         }
       ]
-    }

           "owned_by": "inferenceport-ai"
         }
       ]
+    }
+from uuid import uuid4
+from time import time
+from typing import Any, Dict, List, Optional
+import json
+import os
+import random
+import httpx
+from fastapi import Request, HTTPException, Header
+from fastapi.responses import JSONResponse, StreamingResponse
+def _resp_id(prefix: str) -> str:
+    return f"{prefix}_{uuid4().hex}"
+def _resp_ts() -> int:
+    return int(time())
+def _content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = []
+        for item in content:
+            if isinstance(item, dict):
+                t = item.get("type")
+                if t in ("input_text", "output_text", "text"):
+                    txt = item.get("text")
+                    if isinstance(txt, str):
+                        parts.append(txt)
+        return "".join(parts)
+    return ""
+def _responses_input_to_messages(input_data: Any, instructions: Optional[str] = None) -> List[Dict[str, Any]]:
+    messages: List[Dict[str, Any]] = []
+    if instructions:
+        messages.append({"role": "developer", "content": instructions})
+    if isinstance(input_data, str):
+        messages.append({"role": "user", "content": input_data})
+        return messages
+    if isinstance(input_data, list):
+        for item in input_data:
+            if isinstance(item, str):
+                messages.append({"role": "user", "content": item})
+                continue
+            if not isinstance(item, dict):
+                continue
+            role = item.get("role", "user")
+            content = item.get("content", "")
+            text = _content_to_text(content)
+            if text:
+                messages.append({"role": role, "content": text})
+    return messages
+def _openai_responses_payload(model: str, text: str, input_tokens: int = 0, output_tokens: int = 0) -> Dict[str, Any]:
+    return {
+        "id": _resp_id("resp"),
+        "object": "response",
+        "created_at": _resp_ts(),
+        "status": "completed",
+        "completed_at": _resp_ts(),
+        "error": None,
+        "incomplete_details": None,
+        "instructions": None,
+        "max_output_tokens": None,
+        "model": model,
+        "output": [
+            {
+                "id": _resp_id("msg"),
+                "type": "message",
+                "role": "assistant",
+                "status": "completed",
+                "content": [
+                    {
+                        "type": "output_text",
+                        "text": text,
+                        "annotations": []
+                    }
+                ]
+            }
+        ],
+        "output_text": text,
+        "usage": {
+            "input_tokens": input_tokens,
+            "output_tokens": output_tokens,
+            "total_tokens": input_tokens + output_tokens
+        }
+    }
+async def _generate_text_from_messages(
+    request: Request,
+    messages: List[Dict[str, Any]],
+    authorization: Optional[str],
+    xclientid: Optional[str],
+) -> Dict[str, Any]:
+    totalchars, totalbytes = calculatemessagessize(messages)
+    prompttext = extractusertext(messages)
+    usestools = False
+    longcontext = islongcontext(messages)
+    codepresent = containscode(prompttext)
+    mathheavy = ismathheavyprompt(prompttext)
+    structuredtask = isstructuredtask(prompttext)
+    multiq = multiplequestions(prompttext)
+    codeheavy = iscodeheavyprompt(prompttext, codepresent, longcontext)
+    score = 0
+    if longcontext:
+        score += 3
+    if mathheavy:
+        score += 3
+    if structuredtask:
+        score += 2
+    if codepresent:
+        score += 2
+    if multiq:
+        score += 1
+    for kw in REASONINGKEYWORDS:
+        if kw in prompttext:
+            score += 1
+    if score > 10:
+        score = 10
+    chosenmodel = "llama-3.1-8b-instant"
+    provider = "groq"
+    hasimages = containsimages(messages)
+    if hasimages:
+        chosenmodel = "gpt-4o-mini"
+        provider = "navy vision"
+    else:
+        if usestools:
+            if score >= 6:
+                chosenmodel = "nemotron-3-super"
+                provider = "navy"
+            elif score >= 4:
+                chosenmodel = "openai/gpt-oss-120b"
+                provider = "groq"
+            else:
+                chosenmodel = "openai/gpt-oss-20b"
+                provider = "groq"
+        elif codepresent:
+            if codeheavy and score >= 6:
+                chosenmodel = "o3-mini"
+                provider = "navy"
+            elif score >= 4:
+                chosenmodel = "llama-3.3-70b-versatile"
+                provider = "groq"
+        elif score >= 4:
+            chosenmodel = "meta-llama/llama-4-scout-17b-16e-instruct"
+            provider = "groq"
+        elif score >= 6:
+            chosenmodel = "sonar"
+            provider = "navy"
+        if provider == "groq" and (totalchars > MAXGROQPROMPTCHARS or totalbytes > MAXGROQPROMPTBYTES):
+            provider = "navy"
+            chosenmodel = "gpt-4o-mini"
+    await checkchatratelimit(request, authorization, xclientid)
+    if provider == "groq":
+        groqkeys = os.getenv("GROQKEY")
+        groqkeyslist = [k.strip() for k in groqkeys.split(",") if k.strip()] if groqkeys else []
+        if not groqkeyslist:
+            raise HTTPException(status_code=500, detail="Missing GROQKEYs")
+        apikey = random.choice(groqkeyslist)
+        url = "https://api.groq.com/openai/v1/chat/completions"
+        headers = {"Authorization": f"Bearer {apikey}", "Content-Type": "application/json"}
+        payload = {"model": chosenmodel, "messages": messages, "stream": False}
+        async with httpx.AsyncClient(timeout=None) as client:
+            r = await client.post(url, json=payload, headers=headers)
+        if r.status_code != 200:
+            raise HTTPException(status_code=r.status_code, detail=r.text[:1000])
+        data = r.json()
+        text = ""
+        try:
+            text = data["choices"][0]["message"]["content"] or ""
+        except Exception:
+            text = ""
+        return {"text": text, "model": chosenmodel, "provider": provider, "raw": data}
+    if provider == "navy vision":
+        navykeys = os.getenv("NAVYKEY")
+        navykeyslist = [k.strip() for k in navykeys.split(",") if k.strip()] if navykeys else []
+        if not navykeyslist:
+            raise HTTPException(status_code=500, detail="Missing NAVYKEYs")
+        apikey = random.choice(navykeyslist)
+        url = "https://api.navy/v1/chat/completions"
+        headers = {"Authorization": f"Bearer {apikey}", "Content-Type": "application/json"}
+        payload = {"model": chosenmodel, "messages": messages, "stream": False}
+        async with httpx.AsyncClient(timeout=None) as client:
+            r = await client.post(url, json=payload, headers=headers)
+        if r.status_code != 200:
+            raise HTTPException(status_code=r.status_code, detail=r.text[:1000])
+        data = r.json()
+        text = ""
+        try:
+            text = data["choices"][0]["message"]["content"] or ""
+        except Exception:
+            text = ""
+        return {"text": text, "model": chosenmodel, "provider": provider, "raw": data}
+    if provider == "navy":
+        navykeys = os.getenv("NAVYTEXTONLY")
+        navykeyslist = [k.strip() for k in navykeys.split(",") if k.strip()] if navykeys else []
+        if not navykeyslist:
+            raise HTTPException(status_code=500, detail="Missing NAVY TEXT ONLY keys")
+        apikey = random.choice(navykeyslist)
+        url = "https://api.navy/v1/chat/completions"
+        headers = {"Authorization": f"Bearer {apikey}", "Content-Type": "application/json"}
+        payload = {"model": chosenmodel, "messages": messages, "stream": False}
+        async with httpx.AsyncClient(timeout=None) as client:
+            r = await client.post(url, json=payload, headers=headers)
+        if r.status_code != 200:
+            raise HTTPException(status_code=r.status_code, detail=r.text[:1000])
+        data = r.json()
+        text = ""
+        try:
+            text = data["choices"][0]["message"]["content"] or ""
+        except Exception:
+            text = ""
+        return {"text": text, "model": chosenmodel, "provider": provider, "raw": data}
+    raise HTTPException(status_code=500, detail="Unknown provider routing error")
+@router.post("/responses")
+async def create_responses(
+    request: Request,
+    authorization: Optional[str] = Header(None),
+    xclientid: Optional[str] = Header(None),
+):
+    body = await request.json()
+    model = body.get("model")
+    input_data = body.get("input")
+    instructions = body.get("instructions")
+    stream = body.get("stream", True)
+    response_format = body.get("response_format")
+    if not model:
+        raise HTTPException(status_code=400, detail="model is required")
+    if input_data is None:
+        raise HTTPException(status_code=400, detail="input is required")
+    messages = _responses_input_to_messages(input_data, instructions=instructions)
+    if not messages:
+        raise HTTPException(status_code=400, detail="input could not be parsed")
+    if stream is False:
+        result = await _generate_text_from_messages(
+            request=request,
+            messages=messages,
+            authorization=authorization,
+            xclientid=xclientid,
+        )
+        if "text" not in result:
+            raise HTTPException(status_code=500, detail="upstream generation failed")
+        return JSONResponse(content=_openai_responses_payload(model, result["text"]))
+    async def event_stream():
+        response_id = _resp_id("resp")
+        created = {
+            "type": "response.created",
+            "response": {
+                "id": response_id,
+                "object": "response",
+                "created_at": _resp_ts(),
+                "status": "in_progress",
+                "model": model
+            }
+        }
+        yield f"data: {json.dumps(created)}\n\n"
+        result = await _generate_text_from_messages(
+            request=request,
+            messages=messages,
+            authorization=authorization,
+            xclientid=xclientid,
+        )
+        if "text" in result:
+            text = result["text"]
+            if text:
+                chunk_size = 64
+                for i in range(0, len(text), chunk_size):
+                    delta = text[i:i + chunk_size]
+                    evt = {
+                        "type": "response.output_text.delta",
+                        "response_id": response_id,
+                        "delta": delta
+                    }
+                    yield f"data: {json.dumps(evt)}\n\n"
+            completed = {
+                "type": "response.completed",
+                "response": {
+                    "id": response_id,
+                    "object": "response",
+                    "created_at": _resp_ts(),
+                    "status": "completed",
+                    "completed_at": _resp_ts(),
+                    "model": model,
+                    "output_text": result["text"],
+                    "output": [
+                        {
+                            "id": _resp_id("msg"),
+                            "type": "message",
+                            "role": "assistant",
+                            "status": "completed",
+                            "content": [
+                                {
+                                    "type": "output_text",
+                                    "text": result["text"],
+                                    "annotations": []
+                                }
+                            ]
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,
+                        "output_tokens": 0,
+                        "total_tokens": 0
+                    }
+                }
+            }
+            yield f"data: {json.dumps(completed)}\n\n"
+            yield "data: [DONE]\n\n"
+            return
+        err = {
+            "type": "response.error",
+            "error": result.get("error", {"message": "upstream error"})
+        }
+        yield f"data: {json.dumps(err)}\n\n"
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )