Spaces:
Running
Running
| """ | |
| Azure AI Foundry embedding provider. | |
| Uses text-embedding-ada-002 (or whatever deployment is configured). | |
| """ | |
| from openai import AzureOpenAI | |
| from openmark.embeddings.base import EmbeddingProvider | |
| from openmark import config | |
| class AzureEmbedder(EmbeddingProvider): | |
| def __init__(self): | |
| self._client = AzureOpenAI( | |
| azure_endpoint=config.AZURE_ENDPOINT, | |
| api_key=config.AZURE_API_KEY, | |
| api_version=config.AZURE_API_VERSION, | |
| ) | |
| self._deployment = config.AZURE_DEPLOYMENT_EMBED | |
| print(f"Azure embedder ready — deployment: {self._deployment}") | |
| def _embed(self, texts: list[str]) -> list[list[float]]: | |
| response = self._client.embeddings.create( | |
| input=texts, | |
| model=self._deployment, | |
| ) | |
| return [item.embedding for item in response.data] | |
| def embed_documents(self, texts: list[str]) -> list[list[float]]: | |
| results = [] | |
| batch_size = 100 | |
| for i in range(0, len(texts), batch_size): | |
| batch = texts[i:i + batch_size] | |
| results.extend(self._embed(batch)) | |
| print(f" Azure embedded {min(i + batch_size, len(texts))}/{len(texts)}") | |
| return results | |
| def embed_query(self, text: str) -> list[float]: | |
| return self._embed([text])[0] | |
| def dimension(self) -> int: | |
| return 1536 # ada-002 dimension | |