Spaces:

sharktide
/

lightning

Running

App Files Files Community

sharktide commited on Apr 18

Commit

9162e29

verified ·

1 Parent(s): a90dfbb

Add prompt analyze endpoint

Browse files

Files changed (1) hide show

gen.py +87 -0

gen.py CHANGED Viewed

@@ -835,3 +835,90 @@ async def generate_text(
         return JSONResponse(status_code=r.status_code, content=payload)
     raise HTTPException(500, "Unknown provider routing error")

         return JSONResponse(status_code=r.status_code, content=payload)
     raise HTTPException(500, "Unknown provider routing error")
+@router.post("/prompt_analyze")
+async def analyze_prompt(
+    request: Request
+):
+    body = await request.json()
+    messages = body.get("prompt", [])
+    if not isinstance(messages, list) or len(messages) == 0:
+        raise HTTPException(400, "messages[] is required")
+    total_chars, total_bytes = calculate_messages_size(messages)
+    prompt_text = extract_user_text(messages)
+    uses_tools = (
+        "tools" in body and isinstance(body["tools"], list) and len(body["tools"]) > 0
+    ) or ("tool_choice" in body and body["tool_choice"] not in [None, "none"])
+    long_context = is_long_context(messages)
+    code_present = contains_code(prompt_text)
+    math_heavy = is_math_heavy(prompt_text)
+    structured_task = is_structured_task(prompt_text)
+    multi_q = multiple_questions(prompt_text)
+    code_heavy = is_code_heavy(prompt_text, code_present, long_context)
+    score = 0
+    if long_context:
+        score += 3
+    if math_heavy:
+        score += 3
+    if structured_task:
+        score += 2
+    if code_present:
+        score += 2
+    if multi_q:
+        score += 1
+    for kw in REASONING_KEYWORDS:
+        if kw in prompt_text:
+            score += 1
+    chosen_model = "llama-3.1-8b-instant"
+    provider = "groq"
+    has_images = contains_images(messages)
+    if has_images:
+        chosen_model = "gpt-4o-mini"
+        provider = "navy vision"
+    else:
+        if score > 10:
+            score = 10
+        if uses_tools:
+            if score >= 6:
+                chosen_model = "nemotron-3-super"
+                provider = "navy"
+            elif score >= 4:
+                chosen_model = "openai/gpt-oss-120b"
+                provider = "groq"
+            else:
+                chosen_model = "openai/gpt-oss-20b"
+                provider = "groq"
+        elif code_present:
+            if code_heavy and score >= 6:
+                chosen_model = "qwen-3-235b-a22b-instruct-2507"
+                provider = "cerebras"
+            elif score >= 4:
+                chosen_model = "llama-3.3-70b-versatile"
+                provider = "groq"
+        elif score >= 4:
+            chosen_model = "meta-llama/llama-4-scout-17b-16e-instruct"
+            provider = "groq"
+        if provider == "groq" and (
+            total_chars > MAX_GROQ_PROMPT_CHARS or total_bytes > MAX_GROQ_PROMPT_BYTES
+        ):
+            provider = "cerebras"
+            chosen_model = "qwen-3-235b-a22b-instruct-2507"
+    return { chosen_model, provider }