Spaces:
Running
Running
| """Embedding generation via HuggingFace Inference API (no local torch needed).""" | |
| import os | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| EMBEDDING_DIM = 384 | |
| BATCH_SIZE = 96 # HF Inference API batch limit | |
| def _get_client(): | |
| """Get HuggingFace InferenceClient.""" | |
| from huggingface_hub import InferenceClient | |
| token = os.environ.get("HF_TOKEN") | |
| return InferenceClient(token=token) | |
| def generate(texts: list[str]) -> list[list[float]]: | |
| """Encode texts into embedding vectors via HF Inference API.""" | |
| client = _get_client() | |
| all_embeddings = [] | |
| # Process in batches | |
| for i in range(0, len(texts), BATCH_SIZE): | |
| batch = texts[i : i + BATCH_SIZE] | |
| result = client.feature_extraction(batch, model=MODEL_NAME) | |
| all_embeddings.extend(result) | |
| logger.info("Generated %d embeddings via HF Inference API", len(all_embeddings)) | |
| return [[float(x) for x in emb] for emb in all_embeddings] | |
| def generate_query(query: str) -> list[float]: | |
| """Embed a single query string (for future RAG Engine use).""" | |
| return generate([query])[0] | |