yuvrajsingh6
fix: Fix module imports and paths for HF Spaces
4ba4b25
from typing import List, Dict, Optional
from backend.app.services.embeddings import embedding_service
from backend.app.services.vector_store import vector_store
from backend.app.services.web_search import web_search_service
from backend.app.models.schemas import Source, SourceType, QueryMode
class RetrieverService:
async def retrieve(
self,
query: str,
mode: QueryMode,
top_k: int = 5,
document_ids: Optional[List[str]] = None,
) -> List[Source]:
if mode == QueryMode.PDF:
return await self._retrieve_from_pdf(query, top_k, document_ids or [])
elif mode == QueryMode.WEB:
return await self._retrieve_from_web(query, top_k)
elif mode == QueryMode.HYBRID:
# Get PDF sources
pdf_sources = await self._retrieve_from_pdf(
query, top_k // 2, document_ids or []
)
# Get web sources
web_sources = await self._retrieve_from_web(query, top_k // 2)
# Combine and rerank
return self._merge_and_rerank(pdf_sources + web_sources, top_k)
elif mode == QueryMode.RESTRICTED:
return await self._retrieve_from_pdf(query, top_k, document_ids or [])
async def _retrieve_from_pdf(
self, query: str, top_k: int, document_ids: Optional[List[str]] = None
) -> List[Source]:
query_embedding = embedding_service.embed_text(query)
results = await vector_store.search(
query_embedding=query_embedding, top_k=top_k, document_ids=document_ids
)
sources = []
for result in results:
sources.append(
Source(
type=SourceType.PDF,
content=result["text"],
reference=f"Page {result['metadata']['page_number']}",
title=f"Document {result['metadata']['document_id']}",
relevance_score=result["similarity"],
)
)
return sources
async def _retrieve_from_web(self, query: str, top_k: int) -> List[Source]:
results = await web_search_service.search(query, max_results=top_k)
sources = []
for result in results:
sources.append(
Source(
type=SourceType.WEB,
content=result["snippet"],
reference=result["url"],
title=result["title"],
relevance_score=result.get("score", 0.8),
)
)
return sources
def _merge_and_rerank(self, sources: List[Source], top_k: int) -> List[Source]:
sorted_sources = sorted(
sources, key=lambda x: x.relevance_score or 0, reverse=True
)
return sorted_sources[:top_k]
retriever_service = RetrieverService()