File size: 3,998 Bytes
399b80c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""Embedding generation via OpenAI-compatible API."""

from __future__ import annotations

import json
import logging
import math
import os
import urllib.request
from typing import List, Optional, Tuple

logger = logging.getLogger("openspace.cloud")

# Constants (duplicated here to avoid top-level import of skill_ranker)
SKILL_EMBEDDING_MODEL = "openai/text-embedding-3-small"
SKILL_EMBEDDING_MAX_CHARS = 12_000
SKILL_EMBEDDING_DIMENSIONS = 1536

_OPENROUTER_BASE = "https://openrouter.ai/api/v1"
_OPENAI_BASE = "https://api.openai.com/v1"


def resolve_embedding_api() -> Tuple[Optional[str], str]:
    """Resolve API key and base URL for embedding requests.

    Priority:
      1. ``OPENROUTER_API_KEY`` → OpenRouter base URL
      2. ``OPENAI_API_KEY`` + ``OPENAI_BASE_URL`` (default ``api.openai.com``)
      3. host-agent config (nanobot / openclaw)

    Returns:
        ``(api_key, base_url)`` — *api_key* may be ``None`` when no key is found.
    """
    or_key = os.environ.get("OPENROUTER_API_KEY")
    if or_key:
        return or_key, _OPENROUTER_BASE

    oa_key = os.environ.get("OPENAI_API_KEY")
    if oa_key:
        base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/")
        return oa_key, base

    try:
        from openspace.host_detection import get_openai_api_key
        host_key = get_openai_api_key()
        if host_key:
            base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/")
            return host_key, base
    except Exception:
        pass

    return None, _OPENAI_BASE


def cosine_similarity(a: List[float], b: List[float]) -> float:
    """Compute cosine similarity between two vectors."""
    if len(a) != len(b) or not a:
        return 0.0
    dot = sum(x * y for x, y in zip(a, b))
    norm_a = math.sqrt(sum(x * x for x in a))
    norm_b = math.sqrt(sum(x * x for x in b))
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return dot / (norm_a * norm_b)


def build_skill_embedding_text(
    name: str,
    description: str,
    readme_body: str,
    max_chars: int = SKILL_EMBEDDING_MAX_CHARS,
) -> str:
    """Build text for skill embedding: ``name + description + SKILL.md body``.

    Unified strategy matching MCP search_skills and clawhub platform.
    """
    header = "\n".join(filter(None, [name, description]))
    raw = "\n\n".join(filter(None, [header, readme_body]))
    if len(raw) <= max_chars:
        return raw
    return raw[:max_chars]


def generate_embedding(text: str, api_key: Optional[str] = None) -> Optional[List[float]]:
    """Generate embedding using OpenAI-compatible API.

    When *api_key* is ``None``, credentials are resolved automatically via
    :func:`resolve_embedding_api` (``OPENROUTER_API_KEY`` → ``OPENAI_API_KEY``
    → host-agent config).

    This is a **synchronous** call (uses urllib).  In async contexts,
    wrap with ``asyncio.to_thread()``.

    Args:
        text: The text to embed.
        api_key: Explicit API key.  When provided, base URL is still resolved
                 from environment (``OPENROUTER_API_KEY`` presence determines
                 the endpoint).

    Returns:
        Embedding vector, or None on failure.
    """
    resolved_key, base_url = resolve_embedding_api()
    if api_key is None:
        api_key = resolved_key
    if not api_key:
        return None

    body = json.dumps({
        "model": SKILL_EMBEDDING_MODEL,
        "input": text,
    }).encode("utf-8")

    req = urllib.request.Request(
        f"{base_url}/embeddings",
        data=body,
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}",
        },
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as resp:
            data = json.loads(resp.read().decode("utf-8"))
            return data.get("data", [{}])[0].get("embedding")
    except Exception as e:
        logger.warning("Embedding generation failed: %s", e)
        return None