ConstitutionAgent / core /search.py
Meshyboi's picture
Upload 53 files
0cd3dc5 verified
from qdrant_client.http import models
from utils.config import settings
from sentence_transformers import SentenceTransformer
from services.qdrant import get_qdrant_client
class SearchService:
def __init__(self):
self.client = get_qdrant_client()
# We need an embedding model to vectorize the query
self.encoder = SentenceTransformer('BAAI/bge-small-en-v1.5')
self.collection_name = settings.QDRANT_COLLECTION_NAME
def search(self, query: str, scope: dict = None, limit: int = 15) -> list:
"""
Perform a scoped vector search.
scope: dict containing filters like 'article_numbers', 'parts', 'amendment_ids'
"""
vector = self.encoder.encode(query).tolist()
# Build Filter
query_filter = None
if scope:
conditions = []
# Filter by Articles
if scope.get("article_numbers"):
# "match any of these articles" -> FieldCondition with match any
conditions.append(models.FieldCondition(
key="article_number",
match=models.MatchAny(any=scope["article_numbers"])
))
# Filter by Parts
if scope.get("parts"):
conditions.append(models.FieldCondition(
key="affected_part",
match=models.MatchAny(any=scope["parts"])
))
# Filter by Amendments (if specifically requested)
if scope.get("amendment_ids"):
conditions.append(models.FieldCondition(
key="amendment_number",
match=models.MatchAny(any=scope["amendment_ids"])
))
# Combine conditions with OR if multiple entry points, or AND?
# Usually scoped search implies "Chunks IN Article X OR Chunks IN Part Y" (Union)
# OR "Chunks IN Article X AND is Amendment".
# For simplicity in this Tree-First context:
# If we identified "Article 19", we want chunks FROM Article 19.
# If we identified "Part III", we want chunks FROM Part III.
# So we typically OR the broad scopes.
if conditions:
query_filter = models.Filter(should=conditions)
try:
# Use query_points method as search is missing in this client instance/version
results = self.client.query_points(
collection_name=self.collection_name,
query=vector,
query_filter=query_filter,
limit=limit
).points
# Format results
chunks = []
for hit in results:
chunks.append({
"id": hit.id,
"score": hit.score,
"text": hit.payload.get("text", ""),
"metadata": hit.payload,
"full_chunk": hit.payload
})
return chunks
except Exception as e:
# Fallback to verify client attributes if search fails weirdly
print(f"Qdrant Search Error: {e}")
if "'search'" in str(e):
print(f"Client attributes: {dir(self.client)}")
return []