| """ |
| title: Coding Fallback Pipe |
| author: nerdur |
| author_url: https://nerdur-webui.hf.space |
| version: 4.2 |
| description: | |
| Fallback redoslijed za kodiranje: |
| 1. NVIDIA NIM (Qwen3-Coder, GLM-4.7) |
| 2. Gemini 2.5 Flash |
| 3. Groq (Kimi K2, DeepSeek R1, Llama 3.3) |
| 4. Cerebras |
| 5. SambaNova |
| |
| Korisnik može napisati "next" ili "sljedeći" da dobije |
| odgovor od sljedećeg modela u listi. |
| """ |
|
|
| from pydantic import BaseModel |
| from typing import Optional, Union, Iterator |
| import requests |
| import os |
| import json |
|
|
| _last_model_index: dict = {} |
|
|
| |
| MODEL_MAX_TOKENS = { |
| "qwen/qwen3-coder-480b-a35b-instruct": 32768, |
| "zhipuai/glm-4.7": 8192, |
| "gemini-2.5-flash": 65536, |
| "moonshotai/kimi-k2-instruct": 32768, |
| "deepseek-r1-distill-llama-70b": 32768, |
| "llama-3.3-70b-versatile": 32768, |
| "llama-3.3-70b": 8192, |
| "Meta-Llama-3.3-70B-Instruct": 16384, |
| } |
| DEFAULT_MAX_TOKENS = 16384 |
|
|
|
|
| class Pipe: |
| class Valves(BaseModel): |
| nvidia_api_key: str = "" |
| google_api_key: str = "" |
| groq_api_key: str = "" |
| cerebras_api_key: str = "" |
| sambanova_api_key: str = "" |
| enabled: bool = True |
| request_timeout: int = 120 |
|
|
| def __init__(self): |
| self.type = "pipe" |
| self.id = "coding_fallback" |
| self.name = "🔀 Smart Coding Router (Multi-Provider)" |
| self.valves = self.Valves() |
|
|
| def pipes(self): |
| return [{"id": self.id, "name": self.name}] |
|
|
| def _build_providers(self): |
| nvidia_key = self.valves.nvidia_api_key or os.getenv("NVIDIA_ID_API_KEY") or os.getenv("NVIDIA_API_KEY", "") |
| google_key = self.valves.google_api_key or os.getenv("GOOGLE_API_KEY", "") |
| groq_key = self.valves.groq_api_key or os.getenv("GROQ_API_KEY", "") |
| cerebras_key = self.valves.cerebras_api_key or os.getenv("CEREBRAS_API_KEY", "") |
| sambanova_key = self.valves.sambanova_api_key or os.getenv("SAMBANOVA_API_KEY", "") |
|
|
| providers = [] |
|
|
| if nvidia_key: |
| for model in [ |
| "qwen/qwen3-coder-480b-a35b-instruct", |
| "zhipuai/glm-4.7", |
| ]: |
| providers.append(("NVIDIA", "https://integrate.api.nvidia.com/v1", model, nvidia_key)) |
|
|
| if google_key: |
| providers.append(("Gemini", "https://generativelanguage.googleapis.com/v1beta/openai", "gemini-2.5-flash", google_key)) |
|
|
| if groq_key: |
| for model in [ |
| "moonshotai/kimi-k2-instruct", |
| "deepseek-r1-distill-llama-70b", |
| "llama-3.3-70b-versatile", |
| ]: |
| providers.append(("Groq", "https://api.groq.com/openai/v1", model, groq_key)) |
|
|
| if cerebras_key: |
| providers.append(("Cerebras", "https://api.cerebras.ai/v1", "llama-3.3-70b", cerebras_key)) |
|
|
| if sambanova_key: |
| providers.append(("SambaNova", "https://api.sambanova.ai/v1", "Meta-Llama-3.3-70B-Instruct", sambanova_key)) |
|
|
| return providers |
|
|
| def _call_model(self, provider_name, base_url, model_id, api_key, messages, stream, max_tokens, temperature): |
| headers = { |
| "Authorization": f"Bearer {api_key}", |
| "Content-Type": "application/json", |
| } |
|
|
| |
| model_limit = MODEL_MAX_TOKENS.get(model_id, DEFAULT_MAX_TOKENS) |
| actual_max_tokens = min(max_tokens, model_limit) |
|
|
| payload = { |
| "model": model_id, |
| "messages": messages, |
| "stream": stream, |
| "max_tokens": actual_max_tokens, |
| "temperature": temperature, |
| } |
|
|
| print(f"🔀 Coding Router [{provider_name}]: {model_id} (max_tokens={actual_max_tokens})...") |
| response = requests.post( |
| f"{base_url}/chat/completions", |
| headers=headers, |
| json=payload, |
| stream=stream, |
| timeout=self.valves.request_timeout, |
| ) |
|
|
| if response.status_code in (429, 500, 502, 503, 529): |
| print(f"⚠️ [{provider_name}] {model_id} → HTTP {response.status_code}, prelazim...") |
| return None, f"HTTP {response.status_code}" |
|
|
| if response.status_code == 401: |
| print(f"⚠️ [{provider_name}] Neispravan API ključ") |
| return None, "401 Unauthorized" |
|
|
| response.raise_for_status() |
|
|
| if stream: |
| def generate(resp=response, pname=provider_name, mid=model_id): |
| print(f"✅ [{pname}] Streaming: {mid}") |
| for line in resp.iter_lines(): |
| if line: |
| decoded = line.decode("utf-8") |
| if decoded.strip() == "data: [DONE]": |
| continue |
| yield decoded + "\n" |
| return generate(), None |
| else: |
| data = response.json() |
| content = data["choices"][0]["message"]["content"] |
| print(f"✅ [{provider_name}] Odgovor od: {model_id}") |
| return content, None |
|
|
| def pipe(self, body: dict, __user__: Optional[dict] = None) -> Union[str, Iterator]: |
| if not self.valves.enabled: |
| return "Router je isključen." |
|
|
| providers = self._build_providers() |
| if not providers: |
| return "❌ Nijedan API ključ nije postavljen. Provjeri Space Secrets." |
|
|
| messages = body.get("messages", []) |
| stream = body.get("stream", False) |
| |
| max_tokens = body.get("max_tokens", DEFAULT_MAX_TOKENS) |
| temperature = body.get("temperature", 0.2) |
|
|
| user_id = (__user__ or {}).get("id", "default") |
|
|
| last_user_msg = "" |
| for msg in reversed(messages): |
| if msg.get("role") == "user": |
| last_user_msg = msg.get("content", "").strip().lower() |
| break |
|
|
| next_triggers = ["next", "sljedeći", "sledeci", "sljedeci", "drugi model", |
| "promjeni model", "probaj drugi", "skip", "next model"] |
| want_next = any(t in last_user_msg for t in next_triggers) |
|
|
| if want_next and user_id in _last_model_index: |
| start_index = _last_model_index[user_id] + 1 |
| if start_index >= len(providers): |
| return "❌ Nema više modela u listi. Počinjem od početka!\n\nNapiši svoju poruku ponovo." |
| messages = [m for m in messages if not any( |
| t in m.get("content", "").lower() for t in next_triggers |
| )] |
| print(f"🔀 Korisnik traži sljedeći model, počinjem od indeksa {start_index}") |
| else: |
| start_index = 0 |
|
|
| last_error = None |
| for i in range(start_index, len(providers)): |
| provider_name, base_url, model_id, api_key = providers[i] |
| try: |
| result, error = self._call_model( |
| provider_name, base_url, model_id, api_key, |
| messages, stream, max_tokens, temperature |
| ) |
| if result is not None: |
| _last_model_index[user_id] = i |
| footer = f"\n\n`{provider_name} · {model_id}` — napiši **next** za drugi model" |
|
|
| if stream: |
| def stream_with_footer(gen=result, f=footer): |
| yield from gen |
| footer_chunk = json.dumps({ |
| "choices": [{ |
| "delta": {"content": f}, |
| "finish_reason": None |
| }] |
| }) |
| yield f"data: {footer_chunk}\n\n" |
| yield "data: [DONE]\n\n" |
| return stream_with_footer() |
| else: |
| return result + footer |
|
|
| last_error = error |
| if error == "401 Unauthorized": |
| while i + 1 < len(providers) and providers[i + 1][0] == provider_name: |
| i += 1 |
|
|
| except requests.exceptions.Timeout: |
| print(f"⚠️ [{provider_name}] {model_id} timeout ({self.valves.request_timeout}s), prelazim...") |
| last_error = "timeout" |
| continue |
| except Exception as e: |
| print(f"⚠️ [{provider_name}] {model_id} greška: {e}") |
| last_error = str(e) |
| continue |
|
|
| return f"❌ Svi modeli neuspješni. Posljednja greška: {last_error}" |
|
|