key_word_Fast_API / services /model_router.py
ihtesham0345's picture
feat: Add LongCat API as primary model with Qwen fallback + cross-map fix
249b12a
Raw
History Blame Contribute Delete
1.18 kB
from services.longcat_client import generate as longcat_generate
from services.model_loader import generate_text as qwen_generate
def _log(msg: str):
print(f"[Router] {msg}")
def generate_text(messages, temperature=0.3, max_new_tokens=2000):
text, meta = longcat_generate(messages, temperature, max_new_tokens)
if text:
_log(f"LongCat served β€” model={meta.get('model','?')} tokens={meta.get('tokens','?')}")
return text
err = meta.get("error", "unknown")
if err == "rate_limit":
_log(f"LongCat quota exhausted β€” falling back to Qwen")
elif err == "auth":
_log(f"LongCat auth failed β€” falling back to Qwen")
elif err == "cooldown":
_log(f"LongCat in cooldown β€” falling back to Qwen")
else:
_log(f"LongCat error ({err}) β€” falling back to Qwen")
try:
text = qwen_generate(messages, temperature, max_new_tokens)
if text:
_log("Qwen served (fallback)")
return text
_log("Qwen returned None β€” model may not be loaded")
return None
except Exception as e:
_log(f"Qwen generation failed: {e}")
return None