Spaces:

sharktide
/

lightning

Running

App Files Files Community

sharktide commited on Feb 16

Commit

48286af

verified ·

1 Parent(s): a5bd7b0

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -16

app.py CHANGED Viewed

@@ -18,11 +18,8 @@ app.add_middleware(
 OLLAMA_LIBRARY_URL = "https://ollama.com/library"
-# -----------------------------
-# RATE LIMITING (25 req/day/IP)
-# -----------------------------
 RATE_LIMIT = 25
-WINDOW_SECONDS = 60 * 60 * 24  # 24 hours
 ip_store = {}  # { ip: { "count": int, "reset": timestamp } }
@@ -46,12 +43,65 @@ def check_rate_limit(ip: str):
     entry["count"] += 1
-# -----------------------------
-# IMAGE GENERATION ENDPOINT
-# -----------------------------
-PKEY = os.getenv("POLLINATIONS_KEY", "")  # ensure this is set in your environment
 @app.get("/genimg/{prompt}")
 async def generate_image(prompt: str, request: Request):
@@ -69,17 +119,11 @@ async def generate_image(prompt: str, request: Request):
             detail=f"Pollinations error: {response.status_code}"
         )
-    # Pollinations always returns JPEG
     return Response(
         content=response.content,
         media_type="image/jpeg"
     )
-# -----------------------------
-# EXISTING MODELS SCRAPER
-# -----------------------------
 @app.get("/models")
 async def get_models() -> List[Dict]:
     async with httpx.AsyncClient() as client:
@@ -110,3 +154,77 @@ async def get_models() -> List[Dict]:
         })
     return models

 OLLAMA_LIBRARY_URL = "https://ollama.com/library"
 RATE_LIMIT = 25
+WINDOW_SECONDS = 60 * 60 * 24
 ip_store = {}  # { ip: { "count": int, "reset": timestamp } }
     entry["count"] += 1
+PKEY = os.getenv("POLLINATIONS_KEY", "")
+message_counts = {}
+def increment_message_count(ip: str):
+    message_counts[ip] = message_counts.get(ip, 0) + 1
+    return message_counts[ip]
+GROQ_TOOL_MODELS = [
+    "openai/gpt-oss-120b",
+    "openai/gpt-oss-20b",
+    "meta-llama/llama-4-scout-17b-16e-instruct",
+    "qwen/qwen3-32b",
+    "moonshotai/kimi-k2-instruct",
+]
+GROQ_NORMAL_MODELS = [
+    "llama-3.1-8b-instant",
+    "llama-3.3-70b-versatile",
+    "meta-llama/llama-4-maverick-17b-128e-instruct",
+    "meta-llama/llama-guard-4-12b",
+    "openai/gpt-oss-safeguard-20b",
+    "qwen/qwen3-32b",
+]
+CEREBRAS_MODELS = [
+    "gpt-oss-120b",
+    "llama3.1-8b",
+    "qwen-3-235b-a22b-instruct-2507",
+    "zai-glm-4.7",
+]
+def detect_tool_use(messages: list) -> bool:
+    """
+    Detect if the request uses tools.
+    We check for:
+    - presence of "tool_calls"
+    - messages containing function_call-like structures
+    """
+    for m in messages:
+        if "tool_calls" in m:
+            return True
+        if "function_call" in m:
+            return True
+    return False
+def choose_model(messages: list, msg_count: int):
+    uses_tools = detect_tool_use(messages)
+    if uses_tools:
+        if msg_count > 20:
+            return "openai/gpt-oss-120b", "groq"
+        return "openai/gpt-oss-20b", "groq"
+    if msg_count > 20:
+        return "gpt-oss-120b", "cerebras"
+    return "llama-3.1-8b-instant", "groq"
 @app.get("/genimg/{prompt}")
 async def generate_image(prompt: str, request: Request):
             detail=f"Pollinations error: {response.status_code}"
         )
     return Response(
         content=response.content,
         media_type="image/jpeg"
     )
 @app.get("/models")
 async def get_models() -> List[Dict]:
     async with httpx.AsyncClient() as client:
         })
     return models
+@app.post("/gen")
+async def generate_text(request: Request):
+    body = await request.json()
+    messages = body.get("messages", [])
+    if not isinstance(messages, list) or len(messages) == 0:
+        raise HTTPException(400, "messages[] is required")
+    ip = request.client.host
+    msg_count = increment_message_count(ip)
+    uses_tools = (
+        "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
+    ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
+    requested_model = body.get("model")
+    if uses_tools:
+        if msg_count > 20:
+            chosen_model = "openai/gpt-oss-120b"
+        else:
+            chosen_model = "openai/gpt-oss-20b"
+        provider = "groq"
+    else:
+        if msg_count > 20:
+            chosen_model = "gpt-oss-120b"
+            provider = "cerebras"
+        else:
+            chosen_model = "llama-3.1-8b-instant"
+            provider = "groq"
+    body["model"] = chosen_model
+    # -----------------------------
+    # GROQ FORWARDING
+    # -----------------------------
+    if provider == "groq":
+        GROQ_API_KEY = os.getenv("GROQ_KEY", "")
+        if not GROQ_API_KEY:
+            raise HTTPException(500, "Missing GROQ_KEY")
+        url = "https://api.groq.com/openai/v1/chat/completions"
+        headers = {"Authorization": f"Bearer {GROQ_API_KEY}"}
+        async with httpx.AsyncClient(timeout=None) as client:
+            r = await client.post(url, json=body, headers=headers)
+        return JSONResponse(
+            status_code=r.status_code,
+            content=r.json()
+        )
+    # -----------------------------
+    # CEREBRAS FORWARDING
+    # -----------------------------
+    if provider == "cerebras":
+        CEREBRAS_API_KEY = os.getenv("CER_KEY", "")
+        if not CEREBRAS_API_KEY:
+            raise HTTPException(500, "Missing CER_KEY")
+        url = "https://api.cerebras.ai/v1/chat/completions"
+        headers = {"Authorization": f"Bearer {CEREBRAS_API_KEY}"}
+        async with httpx.AsyncClient(timeout=None) as client:
+            r = await client.post(url, json=body, headers=headers)
+        return JSONResponse(
+            status_code=r.status_code,
+            content=r.json()
+        )
+    raise HTTPException(500, "Unknown provider routing error")