key_word_Fast_API / services /longcat_client.py
ihtesham0345's picture
feat: Add LongCat API as primary model with Qwen fallback + cross-map fix
249b12a
Raw
History Blame Contribute Delete
3.25 kB
import os
import json
import time
import requests
from pathlib import Path
from dotenv import load_dotenv
env_path = Path(__file__).resolve().parent.parent / ".env"
load_dotenv(dotenv_path=env_path)
API_KEY = os.getenv("LONGCAT_API_KEY", "")
BASE_URL = os.getenv("LONGCAT_BASE_URL", "https://api.longcat.chat/openai")
MODEL = os.getenv("LONGCAT_MODEL", "LongCat-2.0-Preview")
TIMEOUT = 60
_cooldown_until = 0.0
def _log(msg: str):
print(f"[LongCat] {msg}")
def generate(messages, temperature=0.3, max_new_tokens=2000):
global _cooldown_until
now = time.time()
if now < _cooldown_until:
_log(f"Cooldown active ({int(_cooldown_until - now)}s remaining), skipping")
return None, {"error": "cooldown"}
if not API_KEY:
_log("No API key configured (LONGCAT_API_KEY)")
return None, {"error": "no_key"}
url = f"{BASE_URL.rstrip('/')}/v1/chat/completions"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": MODEL,
"messages": messages,
"max_tokens": max_new_tokens,
"temperature": temperature,
}
try:
resp = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT)
except requests.exceptions.Timeout:
_log(f"Timeout after {TIMEOUT}s")
return None, {"error": "timeout"}
except requests.exceptions.ConnectionError as e:
_log(f"Connection failed: {e}")
return None, {"error": "connection"}
except requests.exceptions.RequestException as e:
_log(f"Request failed: {e}")
return None, {"error": "request"}
if resp.status_code == 200:
try:
body = resp.json()
choice = body["choices"][0]
text = choice["message"]["content"]
usage = body.get("usage", {})
meta = {
"tokens": usage.get("total_tokens", 0),
"prompt_tokens": usage.get("prompt_tokens", 0),
"completion_tokens": usage.get("completion_tokens", 0),
"model": body.get("model", MODEL),
}
_log(f"OK — {meta['prompt_tokens']} in / {meta['completion_tokens']} out / {meta['tokens']} total")
return text, meta
except (KeyError, IndexError, json.JSONDecodeError) as e:
_log(f"Failed to parse response: {e}")
_log(f"Raw: {resp.text[:300]}")
return None, {"error": "parse"}
if resp.status_code == 429:
retry_after = resp.headers.get("Retry-After", "60")
try:
retry_after = int(retry_after)
except ValueError:
retry_after = 60
_cooldown_until = now + max(retry_after, 60)
_log(f"Quota exhausted (429). Cooldown {retry_after}s. Response: {resp.text[:200]}")
return None, {"error": "rate_limit", "retry_after": retry_after}
if resp.status_code in (401, 403):
_log(f"Auth failed ({resp.status_code}). Disabling LongCat. Response: {resp.text[:200]}")
_cooldown_until = now + 3600
return None, {"error": "auth"}
_log(f"Error {resp.status_code}: {resp.text[:300]}")
return None, {"error": f"http_{resp.status_code}"}