""" title: Coding Fallback Pipe author: nerdur author_url: https://nerdur-webui.hf.space version: 4.2 description: | Fallback redoslijed za kodiranje: 1. NVIDIA NIM (Qwen3-Coder, GLM-4.7) 2. Gemini 2.5 Flash 3. Groq (Kimi K2, DeepSeek R1, Llama 3.3) 4. Cerebras 5. SambaNova Korisnik može napisati "next" ili "sljedeći" da dobije odgovor od sljedećeg modela u listi. """ from pydantic import BaseModel from typing import Optional, Union, Iterator import requests import os import json _last_model_index: dict = {} # Max tokeni po modelu — svaki ima drugačiji limit MODEL_MAX_TOKENS = { "qwen/qwen3-coder-480b-a35b-instruct": 32768, "zhipuai/glm-4.7": 8192, "gemini-2.5-flash": 65536, "moonshotai/kimi-k2-instruct": 32768, "deepseek-r1-distill-llama-70b": 32768, "llama-3.3-70b-versatile": 32768, "llama-3.3-70b": 8192, "Meta-Llama-3.3-70B-Instruct": 16384, } DEFAULT_MAX_TOKENS = 16384 class Pipe: class Valves(BaseModel): nvidia_api_key: str = "" google_api_key: str = "" groq_api_key: str = "" cerebras_api_key: str = "" sambanova_api_key: str = "" enabled: bool = True request_timeout: int = 120 # Povećano sa 45 na 120s def __init__(self): self.type = "pipe" self.id = "coding_fallback" self.name = "🔀 Smart Coding Router (Multi-Provider)" self.valves = self.Valves() def pipes(self): return [{"id": self.id, "name": self.name}] def _build_providers(self): nvidia_key = self.valves.nvidia_api_key or os.getenv("NVIDIA_ID_API_KEY") or os.getenv("NVIDIA_API_KEY", "") google_key = self.valves.google_api_key or os.getenv("GOOGLE_API_KEY", "") groq_key = self.valves.groq_api_key or os.getenv("GROQ_API_KEY", "") cerebras_key = self.valves.cerebras_api_key or os.getenv("CEREBRAS_API_KEY", "") sambanova_key = self.valves.sambanova_api_key or os.getenv("SAMBANOVA_API_KEY", "") providers = [] if nvidia_key: for model in [ "qwen/qwen3-coder-480b-a35b-instruct", "zhipuai/glm-4.7", ]: providers.append(("NVIDIA", "https://integrate.api.nvidia.com/v1", model, nvidia_key)) if google_key: providers.append(("Gemini", "https://generativelanguage.googleapis.com/v1beta/openai", "gemini-2.5-flash", google_key)) if groq_key: for model in [ "moonshotai/kimi-k2-instruct", "deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", ]: providers.append(("Groq", "https://api.groq.com/openai/v1", model, groq_key)) if cerebras_key: providers.append(("Cerebras", "https://api.cerebras.ai/v1", "llama-3.3-70b", cerebras_key)) if sambanova_key: providers.append(("SambaNova", "https://api.sambanova.ai/v1", "Meta-Llama-3.3-70B-Instruct", sambanova_key)) return providers def _call_model(self, provider_name, base_url, model_id, api_key, messages, stream, max_tokens, temperature): headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } # Koristi model-specifični limit, ali ne više od onoga što korisnik traži model_limit = MODEL_MAX_TOKENS.get(model_id, DEFAULT_MAX_TOKENS) actual_max_tokens = min(max_tokens, model_limit) payload = { "model": model_id, "messages": messages, "stream": stream, "max_tokens": actual_max_tokens, "temperature": temperature, } print(f"🔀 Coding Router [{provider_name}]: {model_id} (max_tokens={actual_max_tokens})...") response = requests.post( f"{base_url}/chat/completions", headers=headers, json=payload, stream=stream, timeout=self.valves.request_timeout, ) if response.status_code in (429, 500, 502, 503, 529): print(f"⚠️ [{provider_name}] {model_id} → HTTP {response.status_code}, prelazim...") return None, f"HTTP {response.status_code}" if response.status_code == 401: print(f"⚠️ [{provider_name}] Neispravan API ključ") return None, "401 Unauthorized" response.raise_for_status() if stream: def generate(resp=response, pname=provider_name, mid=model_id): print(f"✅ [{pname}] Streaming: {mid}") for line in resp.iter_lines(): if line: decoded = line.decode("utf-8") if decoded.strip() == "data: [DONE]": continue yield decoded + "\n" return generate(), None else: data = response.json() content = data["choices"][0]["message"]["content"] print(f"✅ [{provider_name}] Odgovor od: {model_id}") return content, None def pipe(self, body: dict, __user__: Optional[dict] = None) -> Union[str, Iterator]: if not self.valves.enabled: return "Router je isključen." providers = self._build_providers() if not providers: return "❌ Nijedan API ključ nije postavljen. Provjeri Space Secrets." messages = body.get("messages", []) stream = body.get("stream", False) # Korisnik može overridati, ali default je visok max_tokens = body.get("max_tokens", DEFAULT_MAX_TOKENS) temperature = body.get("temperature", 0.2) user_id = (__user__ or {}).get("id", "default") last_user_msg = "" for msg in reversed(messages): if msg.get("role") == "user": last_user_msg = msg.get("content", "").strip().lower() break next_triggers = ["next", "sljedeći", "sledeci", "sljedeci", "drugi model", "promjeni model", "probaj drugi", "skip", "next model"] want_next = any(t in last_user_msg for t in next_triggers) if want_next and user_id in _last_model_index: start_index = _last_model_index[user_id] + 1 if start_index >= len(providers): return "❌ Nema više modela u listi. Počinjem od početka!\n\nNapiši svoju poruku ponovo." messages = [m for m in messages if not any( t in m.get("content", "").lower() for t in next_triggers )] print(f"🔀 Korisnik traži sljedeći model, počinjem od indeksa {start_index}") else: start_index = 0 last_error = None for i in range(start_index, len(providers)): provider_name, base_url, model_id, api_key = providers[i] try: result, error = self._call_model( provider_name, base_url, model_id, api_key, messages, stream, max_tokens, temperature ) if result is not None: _last_model_index[user_id] = i footer = f"\n\n`{provider_name} · {model_id}` — napiši **next** za drugi model" if stream: def stream_with_footer(gen=result, f=footer): yield from gen footer_chunk = json.dumps({ "choices": [{ "delta": {"content": f}, "finish_reason": None }] }) yield f"data: {footer_chunk}\n\n" yield "data: [DONE]\n\n" return stream_with_footer() else: return result + footer last_error = error if error == "401 Unauthorized": while i + 1 < len(providers) and providers[i + 1][0] == provider_name: i += 1 except requests.exceptions.Timeout: print(f"⚠️ [{provider_name}] {model_id} timeout ({self.valves.request_timeout}s), prelazim...") last_error = "timeout" continue except Exception as e: print(f"⚠️ [{provider_name}] {model_id} greška: {e}") last_error = str(e) continue return f"❌ Svi modeli neuspješni. Posljednja greška: {last_error}"