webui / coding_priority_pipe.py
Nerdur's picture
Upload coding_priority_pipe.py
6a7a454 verified
Raw
History Blame Contribute Delete
8.69 kB
"""
title: Coding Fallback Pipe
author: nerdur
author_url: https://nerdur-webui.hf.space
version: 4.2
description: |
Fallback redoslijed za kodiranje:
1. NVIDIA NIM (Qwen3-Coder, GLM-4.7)
2. Gemini 2.5 Flash
3. Groq (Kimi K2, DeepSeek R1, Llama 3.3)
4. Cerebras
5. SambaNova
Korisnik može napisati "next" ili "sljedeći" da dobije
odgovor od sljedećeg modela u listi.
"""
from pydantic import BaseModel
from typing import Optional, Union, Iterator
import requests
import os
import json
_last_model_index: dict = {}
# Max tokeni po modelu — svaki ima drugačiji limit
MODEL_MAX_TOKENS = {
"qwen/qwen3-coder-480b-a35b-instruct": 32768,
"zhipuai/glm-4.7": 8192,
"gemini-2.5-flash": 65536,
"moonshotai/kimi-k2-instruct": 32768,
"deepseek-r1-distill-llama-70b": 32768,
"llama-3.3-70b-versatile": 32768,
"llama-3.3-70b": 8192,
"Meta-Llama-3.3-70B-Instruct": 16384,
}
DEFAULT_MAX_TOKENS = 16384
class Pipe:
class Valves(BaseModel):
nvidia_api_key: str = ""
google_api_key: str = ""
groq_api_key: str = ""
cerebras_api_key: str = ""
sambanova_api_key: str = ""
enabled: bool = True
request_timeout: int = 120 # Povećano sa 45 na 120s
def __init__(self):
self.type = "pipe"
self.id = "coding_fallback"
self.name = "🔀 Smart Coding Router (Multi-Provider)"
self.valves = self.Valves()
def pipes(self):
return [{"id": self.id, "name": self.name}]
def _build_providers(self):
nvidia_key = self.valves.nvidia_api_key or os.getenv("NVIDIA_ID_API_KEY") or os.getenv("NVIDIA_API_KEY", "")
google_key = self.valves.google_api_key or os.getenv("GOOGLE_API_KEY", "")
groq_key = self.valves.groq_api_key or os.getenv("GROQ_API_KEY", "")
cerebras_key = self.valves.cerebras_api_key or os.getenv("CEREBRAS_API_KEY", "")
sambanova_key = self.valves.sambanova_api_key or os.getenv("SAMBANOVA_API_KEY", "")
providers = []
if nvidia_key:
for model in [
"qwen/qwen3-coder-480b-a35b-instruct",
"zhipuai/glm-4.7",
]:
providers.append(("NVIDIA", "https://integrate.api.nvidia.com/v1", model, nvidia_key))
if google_key:
providers.append(("Gemini", "https://generativelanguage.googleapis.com/v1beta/openai", "gemini-2.5-flash", google_key))
if groq_key:
for model in [
"moonshotai/kimi-k2-instruct",
"deepseek-r1-distill-llama-70b",
"llama-3.3-70b-versatile",
]:
providers.append(("Groq", "https://api.groq.com/openai/v1", model, groq_key))
if cerebras_key:
providers.append(("Cerebras", "https://api.cerebras.ai/v1", "llama-3.3-70b", cerebras_key))
if sambanova_key:
providers.append(("SambaNova", "https://api.sambanova.ai/v1", "Meta-Llama-3.3-70B-Instruct", sambanova_key))
return providers
def _call_model(self, provider_name, base_url, model_id, api_key, messages, stream, max_tokens, temperature):
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# Koristi model-specifični limit, ali ne više od onoga što korisnik traži
model_limit = MODEL_MAX_TOKENS.get(model_id, DEFAULT_MAX_TOKENS)
actual_max_tokens = min(max_tokens, model_limit)
payload = {
"model": model_id,
"messages": messages,
"stream": stream,
"max_tokens": actual_max_tokens,
"temperature": temperature,
}
print(f"🔀 Coding Router [{provider_name}]: {model_id} (max_tokens={actual_max_tokens})...")
response = requests.post(
f"{base_url}/chat/completions",
headers=headers,
json=payload,
stream=stream,
timeout=self.valves.request_timeout,
)
if response.status_code in (429, 500, 502, 503, 529):
print(f"⚠️ [{provider_name}] {model_id} → HTTP {response.status_code}, prelazim...")
return None, f"HTTP {response.status_code}"
if response.status_code == 401:
print(f"⚠️ [{provider_name}] Neispravan API ključ")
return None, "401 Unauthorized"
response.raise_for_status()
if stream:
def generate(resp=response, pname=provider_name, mid=model_id):
print(f"✅ [{pname}] Streaming: {mid}")
for line in resp.iter_lines():
if line:
decoded = line.decode("utf-8")
if decoded.strip() == "data: [DONE]":
continue
yield decoded + "\n"
return generate(), None
else:
data = response.json()
content = data["choices"][0]["message"]["content"]
print(f"✅ [{provider_name}] Odgovor od: {model_id}")
return content, None
def pipe(self, body: dict, __user__: Optional[dict] = None) -> Union[str, Iterator]:
if not self.valves.enabled:
return "Router je isključen."
providers = self._build_providers()
if not providers:
return "❌ Nijedan API ključ nije postavljen. Provjeri Space Secrets."
messages = body.get("messages", [])
stream = body.get("stream", False)
# Korisnik može overridati, ali default je visok
max_tokens = body.get("max_tokens", DEFAULT_MAX_TOKENS)
temperature = body.get("temperature", 0.2)
user_id = (__user__ or {}).get("id", "default")
last_user_msg = ""
for msg in reversed(messages):
if msg.get("role") == "user":
last_user_msg = msg.get("content", "").strip().lower()
break
next_triggers = ["next", "sljedeći", "sledeci", "sljedeci", "drugi model",
"promjeni model", "probaj drugi", "skip", "next model"]
want_next = any(t in last_user_msg for t in next_triggers)
if want_next and user_id in _last_model_index:
start_index = _last_model_index[user_id] + 1
if start_index >= len(providers):
return "❌ Nema više modela u listi. Počinjem od početka!\n\nNapiši svoju poruku ponovo."
messages = [m for m in messages if not any(
t in m.get("content", "").lower() for t in next_triggers
)]
print(f"🔀 Korisnik traži sljedeći model, počinjem od indeksa {start_index}")
else:
start_index = 0
last_error = None
for i in range(start_index, len(providers)):
provider_name, base_url, model_id, api_key = providers[i]
try:
result, error = self._call_model(
provider_name, base_url, model_id, api_key,
messages, stream, max_tokens, temperature
)
if result is not None:
_last_model_index[user_id] = i
footer = f"\n\n`{provider_name} · {model_id}` — napiši **next** za drugi model"
if stream:
def stream_with_footer(gen=result, f=footer):
yield from gen
footer_chunk = json.dumps({
"choices": [{
"delta": {"content": f},
"finish_reason": None
}]
})
yield f"data: {footer_chunk}\n\n"
yield "data: [DONE]\n\n"
return stream_with_footer()
else:
return result + footer
last_error = error
if error == "401 Unauthorized":
while i + 1 < len(providers) and providers[i + 1][0] == provider_name:
i += 1
except requests.exceptions.Timeout:
print(f"⚠️ [{provider_name}] {model_id} timeout ({self.valves.request_timeout}s), prelazim...")
last_error = "timeout"
continue
except Exception as e:
print(f"⚠️ [{provider_name}] {model_id} greška: {e}")
last_error = str(e)
continue
return f"❌ Svi modeli neuspješni. Posljednja greška: {last_error}"