| # app/service/embedding_service.py | |
| import requests | |
| from app.deps import HF_API_TOKEN | |
| class EmbeddingService: | |
| def __init__(self): | |
| self.api_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2" | |
| self.headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
| def generate(self, text: str) -> list[float]: | |
| """Generate embedding - uses HF free tier (10k/day)""" | |
| try: | |
| response = requests.post( | |
| self.api_url, | |
| headers=self.headers, | |
| json={"inputs": text, "options": {"wait_for_model": True}}, | |
| timeout=30 | |
| ) | |
| response.raise_for_status() | |
| return response.json() | |
| except Exception as e: | |
| # Fallback to local if API fails | |
| print(f"HF API failed, using local fallback: {e}") | |
| return self._local_fallback(text) | |
| def _local_fallback(self, text: str) -> list[float]: | |
| """Local embedding generation (slower but reliable)""" | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| return model.encode(text).tolist() | |
| embedder = EmbeddingService() |