Spaces:
Sleeping
Sleeping
| """ | |
| service about knowledge retrieve | |
| """ | |
| import logging | |
| from typing import List, Tuple, Dict, Any, Optional | |
| from lpm_kernel.file_data.embedding_service import EmbeddingService, ChunkDTO | |
| from lpm_kernel.kernel.l1.l1_manager import get_latest_global_bio | |
| logger = logging.getLogger(__name__) | |
| class L0KnowledgeRetriever: | |
| """L0 knowledge retriever""" | |
| def __init__( | |
| self, | |
| embedding_service: EmbeddingService, | |
| similarity_threshold: float = 0.7, | |
| max_chunks: int = 3, | |
| ): | |
| """ | |
| init L0 knowledge retriever | |
| Args: | |
| embedding_service: Embedding service instance | |
| similarity_threshold: only return contents whose similarity bigger than this value | |
| max_chunks: the maximum number of return chunks | |
| """ | |
| self.embedding_service = embedding_service | |
| self.similarity_threshold = similarity_threshold | |
| self.max_chunks = max_chunks | |
| def retrieve(self, query: str) -> str: | |
| """ | |
| retrieve L0 knowledge | |
| Args: | |
| query: query content | |
| Returns: | |
| str: structured knowledge content, or empty string if no relevant knowledge found | |
| """ | |
| try: | |
| # search related chunks | |
| similar_chunks: List[ | |
| Tuple[ChunkDTO, float] | |
| ] = self.embedding_service.search_similar_chunks( | |
| query=query, limit=self.max_chunks | |
| ) | |
| # filter out low similarity chunks | |
| if not similar_chunks: | |
| return "" | |
| knowledge_parts = [] | |
| for chunk, similarity in similar_chunks: | |
| if similarity >= self.similarity_threshold: | |
| knowledge_parts.append(chunk.content) | |
| if not knowledge_parts: | |
| return "" | |
| # merge multiple knowledge parts into one | |
| return "\n\n".join(knowledge_parts) | |
| except Exception as e: | |
| logger.error(f"L0 knowledge retrieval failed: {str(e)}") | |
| return "" | |
| class L1KnowledgeRetriever: | |
| """L1 knowledge retriever""" | |
| def __init__( | |
| self, | |
| embedding_service: EmbeddingService, | |
| similarity_threshold: float = 0.7, | |
| max_shades: int = 3, | |
| ): | |
| """ | |
| init L1 knowledge retriever | |
| Args: | |
| embedding_service: Embedding service instance | |
| similarity_threshold: only return contents whose similarity bigger than this value | |
| max_shades: the maximum number of return shades | |
| """ | |
| self.embedding_service = embedding_service | |
| self.similarity_threshold = similarity_threshold | |
| self.max_shades = max_shades | |
| def retrieve(self, query: str) -> str: | |
| """ | |
| search related L1 shades | |
| Args: | |
| query: query content | |
| Returns: | |
| str: structured knowledge content, or empty string if no relevant knowledge found | |
| """ | |
| try: | |
| # get global bio shades | |
| global_bio = get_latest_global_bio() | |
| if not global_bio or not global_bio.shades: | |
| logger.info("Global Bio not found or Shades is empty") | |
| return "" | |
| # get query embedding | |
| query_embedding = self.embedding_service.get_embedding(query) | |
| if not query_embedding: | |
| logger.error("Failed to get embedding for query text") | |
| return "" | |
| # get all shades' embeddings | |
| shade_embeddings = [] | |
| for shade in global_bio.shades: | |
| shade_text = ( | |
| f"{shade.get('title', '')} - {shade.get('description', '')}" | |
| ) | |
| embedding = self.embedding_service.get_embedding(shade_text) | |
| if embedding: | |
| shade_embeddings.append((shade, embedding)) | |
| if not shade_embeddings: | |
| logger.info("No available Shades embeddings found") | |
| return "" | |
| # calculate similarity and sort | |
| similar_shades = [] | |
| for shade, embedding in shade_embeddings: | |
| similarity = self.embedding_service.calculate_similarity( | |
| query_embedding, embedding | |
| ) | |
| if similarity >= self.similarity_threshold: | |
| similar_shades.append((shade, similarity)) | |
| # sort according to similarity and limit the number of returned shades | |
| similar_shades.sort(key=lambda x: x[1], reverse=True) | |
| similar_shades = similar_shades[: self.max_shades] | |
| if not similar_shades: | |
| return "" | |
| # structured output | |
| shade_parts = [] | |
| for shade, similarity in similar_shades: | |
| shade_text = f"Shade: {shade.get('title', '')}\n" | |
| shade_text += f"Description: {shade.get('description', '')}\n" | |
| shade_text += f"Similarity: {similarity:.2f}" | |
| shade_parts.append(shade_text) | |
| return "\n\n".join(shade_parts) | |
| except Exception as e: | |
| logger.error(f"L1 knowledge retrieval failed: {str(e)}") | |
| return "" | |
| # create overall knowledge retriever instance | |
| default_retriever = L0KnowledgeRetriever( | |
| embedding_service=EmbeddingService(), similarity_threshold=0.7, max_chunks=3 | |
| ) | |
| default_l1_retriever = L1KnowledgeRetriever( | |
| embedding_service=EmbeddingService(), similarity_threshold=0.7, max_shades=3 | |
| ) | |