Spaces:

sharktide
/

lightning

Running

App Files Files Community

sharktide commited on Feb 16

Commit

ef7feb8

verified ·

1 Parent(s): 3fa53f7

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -44

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import httpx
 from bs4 import BeautifulSoup
 from typing import List, Dict
 import asyncio
 app = FastAPI()
 app.add_middleware(
@@ -23,6 +23,170 @@ RATE_LIMIT = 25
 WINDOW_SECONDS = 60 * 60 * 24
 ip_store = {}  # { ip: { "count": int, "reset": timestamp } }
 def check_rate_limit(ip: str):
     now = time.time()
@@ -99,40 +263,19 @@ def check_chat_rate_limit(ip: str):
     entry["count"] += 1
     return entry["count"]
-def detect_tool_use(messages: list) -> bool:
-    """
-    Detect if the request uses tools.
-    We check for:
-    - presence of "tool_calls"
-    - messages containing function_call-like structures
-    """
-    for m in messages:
-        if "tool_calls" in m:
-            return True
-        if "function_call" in m:
-            return True
-    return False
-def choose_model(messages: list, msg_count: int):
-    uses_tools = detect_tool_use(messages)
-    if uses_tools:
-        if msg_count > 20:
-            return "openai/gpt-oss-120b", "groq"
-        return "openai/gpt-oss-20b", "groq"
-    if msg_count > 20:
-        return "gpt-oss-120b", "cerebras"
-    return "llama-3.1-8b-instant", "groq"
 @app.get("/genimg/{prompt}")
 async def generate_image(prompt: str, request: Request):
     client_ip = request.client.host
     check_rate_limit(client_ip)
-    url = f"https://gen.pollinations.ai/image/{prompt}?model=zimage&key={PKEY}"
     async with httpx.AsyncClient() as client:
         response = await client.get(url)
@@ -189,30 +332,84 @@ async def generate_text(request: Request):
     ip = request.client.host
     msg_count = check_chat_rate_limit(ip)
     uses_tools = (
         "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
     ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
-    requested_model = body.get("model")
     if uses_tools:
-        if msg_count > 20:
             chosen_model = "openai/gpt-oss-120b"
         else:
             chosen_model = "openai/gpt-oss-20b"
         provider = "groq"
-    else:
-        if msg_count > 20:
-            chosen_model = "gpt-oss-120b"
             provider = "cerebras"
-        else:
-            chosen_model = "llama-3.1-8b-instant"
-            provider = "groq"
     body["model"] = chosen_model
-    print(f"[TEXT GEN] INFO: Selected model: {chosen_model} {provider}")
     stream = body.get("stream", False)
     if provider == "groq":

 from bs4 import BeautifulSoup
 from typing import List, Dict
 import asyncio
+import re
 app = FastAPI()
 app.add_middleware(
 WINDOW_SECONDS = 60 * 60 * 24
 ip_store = {}  # { ip: { "count": int, "reset": timestamp } }
+REASONING_KEYWORDS = [
+    # explicit reasoning requests
+    "prove", "demonstrate", "derive", "justify", "verify",
+    "show that", "walk through", "step by step", "reason through",
+    "chain of reasoning", "rigorous", "formal proof",
+    # analysis/comparison
+    "analyze", "analysis of", "compare and contrast",
+    "evaluate", "critically assess", "explain why",
+    "explain how", "what causes", "implications of",
+    # problem solving
+    "solve", "solution to", "how would you approach",
+    "strategy for", "optimize", "algorithm for",
+    # technical domains
+    "theorem", "lemma", "corollary",
+    "complexity analysis", "big o", "time complexity",
+    "mathematical", "statistical", "probabilistic",
+    "model the", "simulate",
+]
+CODE_KEYWORDS = [
+    "await", "async", "print(", "console.log(",
+    "code", ".ts", ".js", ".py", ".repy", ".rb",
+    "gnu", "gcc", "clang", "clang++", "program",
+    "coding"
+]
+CREATIVE_KEYWORDS = [
+    # cinematic cues
+    "cinematic", "film still", "movie scene",
+    "epic", "dramatic lighting", "moody lighting",
+    "volumetric lighting", "depth of field",
+    "anamorphic lens", "8k", "4k",
+    # art styles
+    "concept art", "digital painting",
+    "fantasy art", "sci-fi", "mythical",
+    "cyberpunk", "steampunk",
+    "baroque", "surreal", "abstract",
+    "oil painting", "watercolor",
+    # rendering engines
+    "octane render", "unreal engine",
+    "ray tracing", "global illumination",
+    # emotional narrative framing
+    "emotional portrait", "story scene",
+    "hero shot", "dramatic pose",
+]
+STRUCTURED_KEYWORDS = [
+    "return as json",
+    "output json",
+    "json schema",
+    "format as json",
+    "structured output",
+    "extract entities",
+    "extract fields",
+    "parse this",
+    "convert to table",
+    "create a table",
+    "categorize into",
+    "classify",
+    "label the following",
+    "taxonomy",
+    "generate schema",
+]
+MATH_PATTERNS = [
+    r"\b∫\b", r"\b∑\b", r"\b∂\b",
+    r"\bmatrix\b",
+    r"\blimit\b",
+    r"\bintegral\b",
+    r"\bderivative\b",
+    r"\bdifferential equation\b",
+    r"\blinear algebra\b",
+    r"\boptimi[sz]e\b",
+    r"\bgradient\b",
+    r"\bbackprop\b",
+    r"\bproof\b",
+    r"\btheorem\b",
+]
+LIGHTWEIGHT_KEYWORDS = [
+    "hello", "hi", "hey",
+    "thanks", "thank you",
+    "define", "definition of",
+    "what is", "who is",
+    "quick question",
+    "short answer",
+    "brief explanation",
+    "summarize",
+    "paraphrase",
+    "rewrite this",
+]
+def is_long_context(messages: list) -> bool:
+    total_chars = sum(len(m.get("content", "")) for m in messages)
+    return total_chars > 4000
+def contains_code(prompt: str) -> bool:
+    if "```" in prompt:
+        return True
+    for kw in CODE_KEYWORDS:
+        if kw in prompt:
+            return True
+    return False
+def is_math_heavy(prompt: str) -> bool:
+    for pattern in MATH_PATTERNS:
+        if re.search(pattern, prompt):
+            return True
+    return False
+def is_structured_task(prompt: str) -> bool:
+    for kw in STRUCTURED_KEYWORDS:
+        if kw in prompt:
+            return True
+    return False
+def multiple_questions(prompt: str) -> bool:
+    return prompt.count("?") >= 3
+def extract_user_text(messages: list) -> str:
+    return " ".join(
+        m.get("content", "")
+        for m in messages
+        if m.get("role") == "user"
+    ).lower()
+def is_complex_reasoning(prompt: str) -> bool:
+    if len(prompt) > 800:
+        return True
+    for kw in REASONING_KEYWORDS:
+        if kw in prompt:
+            return True
+    if re.search(r"\b(if|therefore|assume|let x|given that)\b", prompt):
+        return True
+    return False
+def is_lightweight(prompt: str) -> bool:
+    if len(prompt) < 100:
+        for kw in LIGHTWEIGHT_KEYWORDS:
+            if kw in prompt:
+                return True
+    return False
+def is_cinematic_image_prompt(prompt: str) -> bool:
+    for kw in CREATIVE_KEYWORDS:
+        if kw in prompt.lower():
+            return True
+    return False
 def check_rate_limit(ip: str):
     now = time.time()
     entry["count"] += 1
     return entry["count"]
 @app.get("/genimg/{prompt}")
 async def generate_image(prompt: str, request: Request):
     client_ip = request.client.host
     check_rate_limit(client_ip)
+    if is_cinematic_image_prompt(prompt):
+        chosen_model = "flux"
+    else:
+        chosen_model = "zimage"
+    print(f"[IMAGE GEN] Routing to model: {chosen_model}")
+    url = f"https://gen.pollinations.ai/image/{prompt}?model={chosen_model}&key={PKEY}"
     async with httpx.AsyncClient() as client:
         response = await client.get(url)
     ip = request.client.host
     msg_count = check_chat_rate_limit(ip)
+    prompt_text = extract_user_text(messages)
     uses_tools = (
         "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
     ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
+    long_context = is_long_context(messages)
+    code_present = contains_code(prompt_text)
+    math_heavy = is_math_heavy(prompt_text)
+    structured_task = is_structured_task(prompt_text)
+    multi_q = multiple_questions(prompt_text)
+    score = 0
+    if long_context:
+        score += 3
+    if math_heavy:
+        score += 3
+    if structured_task:
+        score += 2
+    if code_present:
+        score += 2
+    if multi_q:
+        score += 1
+    for kw in REASONING_KEYWORDS:
+        if kw in prompt_text:
+            score += 1
+    chosen_model = "llama-3.1-8b-instant"
+    provider = "groq"
     if uses_tools:
+        # tools always need reliability
+        if score >= 4:
             chosen_model = "openai/gpt-oss-120b"
         else:
             chosen_model = "openai/gpt-oss-20b"
         provider = "groq"
+    elif code_heavy:
+        if score >= 6:
+            chosen_model = "zai-glm-4.7"
             provider = "cerebras"
+    elif score >= 6:
+        # extreme reasoning
+        chosen_model = "gpt-oss-120b"
+        provider = "cerebras"
+    elif score >= 4:
+        # medium-high reasoning
+        chosen_model = "llama-3.3-70b-versatile"
+        provider = "groq"
+    elif score >= 3 and structured_task:
+        chosen_model = "qwen-3-235b-a22b-instruct-2507"
+        provider = "cerebras"
+    # else → stay instant
     body["model"] = chosen_model
+    print(f"""
+    [ADVANCED ROUTER]
+      Score: {score}
+      Uses tools: {uses_tools}
+      Long context: {long_context}
+      Code present: {code_present}
+      Math heavy: {math_heavy}
+      Structured: {structured_task}
+      Multi-question: {multi_q}
+      → Selected: {chosen_model} ({provider})
+    """)
     stream = body.get("stream", False)
     if provider == "groq":