| from services.longcat_client import generate as longcat_generate | |
| from services.model_loader import generate_text as qwen_generate | |
| def _log(msg: str): | |
| print(f"[Router] {msg}") | |
| def generate_text(messages, temperature=0.3, max_new_tokens=2000): | |
| text, meta = longcat_generate(messages, temperature, max_new_tokens) | |
| if text: | |
| _log(f"LongCat served β model={meta.get('model','?')} tokens={meta.get('tokens','?')}") | |
| return text | |
| err = meta.get("error", "unknown") | |
| if err == "rate_limit": | |
| _log(f"LongCat quota exhausted β falling back to Qwen") | |
| elif err == "auth": | |
| _log(f"LongCat auth failed β falling back to Qwen") | |
| elif err == "cooldown": | |
| _log(f"LongCat in cooldown β falling back to Qwen") | |
| else: | |
| _log(f"LongCat error ({err}) β falling back to Qwen") | |
| try: | |
| text = qwen_generate(messages, temperature, max_new_tokens) | |
| if text: | |
| _log("Qwen served (fallback)") | |
| return text | |
| _log("Qwen returned None β model may not be loaded") | |
| return None | |
| except Exception as e: | |
| _log(f"Qwen generation failed: {e}") | |
| return None | |