import os import json import time import requests from pathlib import Path from dotenv import load_dotenv env_path = Path(__file__).resolve().parent.parent / ".env" load_dotenv(dotenv_path=env_path) API_KEY = os.getenv("LONGCAT_API_KEY", "") BASE_URL = os.getenv("LONGCAT_BASE_URL", "https://api.longcat.chat/openai") MODEL = os.getenv("LONGCAT_MODEL", "LongCat-2.0-Preview") TIMEOUT = 60 _cooldown_until = 0.0 def _log(msg: str): print(f"[LongCat] {msg}") def generate(messages, temperature=0.3, max_new_tokens=2000): global _cooldown_until now = time.time() if now < _cooldown_until: _log(f"Cooldown active ({int(_cooldown_until - now)}s remaining), skipping") return None, {"error": "cooldown"} if not API_KEY: _log("No API key configured (LONGCAT_API_KEY)") return None, {"error": "no_key"} url = f"{BASE_URL.rstrip('/')}/v1/chat/completions" headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json", } payload = { "model": MODEL, "messages": messages, "max_tokens": max_new_tokens, "temperature": temperature, } try: resp = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT) except requests.exceptions.Timeout: _log(f"Timeout after {TIMEOUT}s") return None, {"error": "timeout"} except requests.exceptions.ConnectionError as e: _log(f"Connection failed: {e}") return None, {"error": "connection"} except requests.exceptions.RequestException as e: _log(f"Request failed: {e}") return None, {"error": "request"} if resp.status_code == 200: try: body = resp.json() choice = body["choices"][0] text = choice["message"]["content"] usage = body.get("usage", {}) meta = { "tokens": usage.get("total_tokens", 0), "prompt_tokens": usage.get("prompt_tokens", 0), "completion_tokens": usage.get("completion_tokens", 0), "model": body.get("model", MODEL), } _log(f"OK — {meta['prompt_tokens']} in / {meta['completion_tokens']} out / {meta['tokens']} total") return text, meta except (KeyError, IndexError, json.JSONDecodeError) as e: _log(f"Failed to parse response: {e}") _log(f"Raw: {resp.text[:300]}") return None, {"error": "parse"} if resp.status_code == 429: retry_after = resp.headers.get("Retry-After", "60") try: retry_after = int(retry_after) except ValueError: retry_after = 60 _cooldown_until = now + max(retry_after, 60) _log(f"Quota exhausted (429). Cooldown {retry_after}s. Response: {resp.text[:200]}") return None, {"error": "rate_limit", "retry_after": retry_after} if resp.status_code in (401, 403): _log(f"Auth failed ({resp.status_code}). Disabling LongCat. Response: {resp.text[:200]}") _cooldown_until = now + 3600 return None, {"error": "auth"} _log(f"Error {resp.status_code}: {resp.text[:300]}") return None, {"error": f"http_{resp.status_code}"}