Spaces:
Sleeping
Sleeping
| from qdrant_client.http import models | |
| from utils.config import settings | |
| from sentence_transformers import SentenceTransformer | |
| from services.qdrant import get_qdrant_client | |
| class SearchService: | |
| def __init__(self): | |
| self.client = get_qdrant_client() | |
| # We need an embedding model to vectorize the query | |
| self.encoder = SentenceTransformer('BAAI/bge-small-en-v1.5') | |
| self.collection_name = settings.QDRANT_COLLECTION_NAME | |
| def search(self, query: str, scope: dict = None, limit: int = 15) -> list: | |
| """ | |
| Perform a scoped vector search. | |
| scope: dict containing filters like 'article_numbers', 'parts', 'amendment_ids' | |
| """ | |
| vector = self.encoder.encode(query).tolist() | |
| # Build Filter | |
| query_filter = None | |
| if scope: | |
| conditions = [] | |
| # Filter by Articles | |
| if scope.get("article_numbers"): | |
| # "match any of these articles" -> FieldCondition with match any | |
| conditions.append(models.FieldCondition( | |
| key="article_number", | |
| match=models.MatchAny(any=scope["article_numbers"]) | |
| )) | |
| # Filter by Parts | |
| if scope.get("parts"): | |
| conditions.append(models.FieldCondition( | |
| key="affected_part", | |
| match=models.MatchAny(any=scope["parts"]) | |
| )) | |
| # Filter by Amendments (if specifically requested) | |
| if scope.get("amendment_ids"): | |
| conditions.append(models.FieldCondition( | |
| key="amendment_number", | |
| match=models.MatchAny(any=scope["amendment_ids"]) | |
| )) | |
| # Combine conditions with OR if multiple entry points, or AND? | |
| # Usually scoped search implies "Chunks IN Article X OR Chunks IN Part Y" (Union) | |
| # OR "Chunks IN Article X AND is Amendment". | |
| # For simplicity in this Tree-First context: | |
| # If we identified "Article 19", we want chunks FROM Article 19. | |
| # If we identified "Part III", we want chunks FROM Part III. | |
| # So we typically OR the broad scopes. | |
| if conditions: | |
| query_filter = models.Filter(should=conditions) | |
| try: | |
| # Use query_points method as search is missing in this client instance/version | |
| results = self.client.query_points( | |
| collection_name=self.collection_name, | |
| query=vector, | |
| query_filter=query_filter, | |
| limit=limit | |
| ).points | |
| # Format results | |
| chunks = [] | |
| for hit in results: | |
| chunks.append({ | |
| "id": hit.id, | |
| "score": hit.score, | |
| "text": hit.payload.get("text", ""), | |
| "metadata": hit.payload, | |
| "full_chunk": hit.payload | |
| }) | |
| return chunks | |
| except Exception as e: | |
| # Fallback to verify client attributes if search fails weirdly | |
| print(f"Qdrant Search Error: {e}") | |
| if "'search'" in str(e): | |
| print(f"Client attributes: {dir(self.client)}") | |
| return [] | |