| import os |
| import json |
| import time |
| import requests |
| from pathlib import Path |
| from dotenv import load_dotenv |
|
|
| env_path = Path(__file__).resolve().parent.parent / ".env" |
| load_dotenv(dotenv_path=env_path) |
|
|
| API_KEY = os.getenv("LONGCAT_API_KEY", "") |
| BASE_URL = os.getenv("LONGCAT_BASE_URL", "https://api.longcat.chat/openai") |
| MODEL = os.getenv("LONGCAT_MODEL", "LongCat-2.0-Preview") |
| TIMEOUT = 60 |
|
|
| _cooldown_until = 0.0 |
|
|
|
|
| def _log(msg: str): |
| print(f"[LongCat] {msg}") |
|
|
|
|
| def generate(messages, temperature=0.3, max_new_tokens=2000): |
| global _cooldown_until |
|
|
| now = time.time() |
| if now < _cooldown_until: |
| _log(f"Cooldown active ({int(_cooldown_until - now)}s remaining), skipping") |
| return None, {"error": "cooldown"} |
|
|
| if not API_KEY: |
| _log("No API key configured (LONGCAT_API_KEY)") |
| return None, {"error": "no_key"} |
|
|
| url = f"{BASE_URL.rstrip('/')}/v1/chat/completions" |
| headers = { |
| "Authorization": f"Bearer {API_KEY}", |
| "Content-Type": "application/json", |
| } |
| payload = { |
| "model": MODEL, |
| "messages": messages, |
| "max_tokens": max_new_tokens, |
| "temperature": temperature, |
| } |
|
|
| try: |
| resp = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT) |
| except requests.exceptions.Timeout: |
| _log(f"Timeout after {TIMEOUT}s") |
| return None, {"error": "timeout"} |
| except requests.exceptions.ConnectionError as e: |
| _log(f"Connection failed: {e}") |
| return None, {"error": "connection"} |
| except requests.exceptions.RequestException as e: |
| _log(f"Request failed: {e}") |
| return None, {"error": "request"} |
|
|
| if resp.status_code == 200: |
| try: |
| body = resp.json() |
| choice = body["choices"][0] |
| text = choice["message"]["content"] |
| usage = body.get("usage", {}) |
| meta = { |
| "tokens": usage.get("total_tokens", 0), |
| "prompt_tokens": usage.get("prompt_tokens", 0), |
| "completion_tokens": usage.get("completion_tokens", 0), |
| "model": body.get("model", MODEL), |
| } |
| _log(f"OK — {meta['prompt_tokens']} in / {meta['completion_tokens']} out / {meta['tokens']} total") |
| return text, meta |
| except (KeyError, IndexError, json.JSONDecodeError) as e: |
| _log(f"Failed to parse response: {e}") |
| _log(f"Raw: {resp.text[:300]}") |
| return None, {"error": "parse"} |
|
|
| if resp.status_code == 429: |
| retry_after = resp.headers.get("Retry-After", "60") |
| try: |
| retry_after = int(retry_after) |
| except ValueError: |
| retry_after = 60 |
| _cooldown_until = now + max(retry_after, 60) |
| _log(f"Quota exhausted (429). Cooldown {retry_after}s. Response: {resp.text[:200]}") |
| return None, {"error": "rate_limit", "retry_after": retry_after} |
|
|
| if resp.status_code in (401, 403): |
| _log(f"Auth failed ({resp.status_code}). Disabling LongCat. Response: {resp.text[:200]}") |
| _cooldown_until = now + 3600 |
| return None, {"error": "auth"} |
|
|
| _log(f"Error {resp.status_code}: {resp.text[:300]}") |
| return None, {"error": f"http_{resp.status_code}"} |
|
|