Spaces:
Sleeping
Sleeping
File size: 1,704 Bytes
85de68f 1118099 85de68f 162bc67 85de68f e256fc1 1118099 85de68f 1118099 85de68f e256fc1 85de68f e256fc1 85de68f 1118099 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# API_model.py
import os
import numpy as np
from huggingface_hub import InferenceClient
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
HF_TOKEN = os.getenv("HF_TOKEN") # set in env/Space Variables
MAX_LEN = int(os.getenv("MAX_TEXT_LEN", "20000"))
REQ_TIMEOUT = float(os.getenv("REQ_TIMEOUT", "40"))
_hf = InferenceClient(model=EMBED_MODEL, token=HF_TOKEN, timeout=REQ_TIMEOUT)
def _trim(s: str) -> str:
s = (s or "").strip()
return s if len(s) <= MAX_LEN else s[:MAX_LEN]
def _cosine(a: np.ndarray, b: np.ndarray) -> float:
denom = (np.linalg.norm(a) * np.linalg.norm(b)) or 1.0
return float(np.dot(a, b) / denom)
def _embed_api(text: str) -> np.ndarray:
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN is not set (add it in environment or Space → Settings → Variables).")
feats = _hf.feature_extraction(_trim(text)) # may raise on auth/model/timeouts
arr = np.array(feats, dtype=np.float32)
if arr.ndim == 2: # token-level → mean pool
arr = arr.mean(axis=0)
if arr.ndim != 1:
raise RuntimeError(f"Unexpected embedding shape from the Inference API: {arr.shape}")
return arr
def calculate_similarity_api(text_a: str, text_b: str) -> float:
a_vec = _embed_api(text_a)
b_vec = _embed_api(text_b)
score = _cosine(a_vec, b_vec) * 100.0
return float(np.round(score, 2))
def check_api_health() -> tuple[bool, str]:
"""Quick probe to fail fast with a helpful message in the UI."""
try:
_ = _embed_api("healthcheck")
return True, f"OK (model={EMBED_MODEL})"
except Exception as e:
return False, f"{type(e).__name__}: {e}"
|