from services.longcat_client import generate as longcat_generate from services.model_loader import generate_text as qwen_generate def _log(msg: str): print(f"[Router] {msg}") def generate_text(messages, temperature=0.3, max_new_tokens=2000): text, meta = longcat_generate(messages, temperature, max_new_tokens) if text: _log(f"LongCat served — model={meta.get('model','?')} tokens={meta.get('tokens','?')}") return text err = meta.get("error", "unknown") if err == "rate_limit": _log(f"LongCat quota exhausted — falling back to Qwen") elif err == "auth": _log(f"LongCat auth failed — falling back to Qwen") elif err == "cooldown": _log(f"LongCat in cooldown — falling back to Qwen") else: _log(f"LongCat error ({err}) — falling back to Qwen") try: text = qwen_generate(messages, temperature, max_new_tokens) if text: _log("Qwen served (fallback)") return text _log("Qwen returned None — model may not be loaded") return None except Exception as e: _log(f"Qwen generation failed: {e}") return None