Spaces:
Sleeping
Sleeping
File size: 5,520 Bytes
01d5a5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
"""
service about knowledge retrieve
"""
import logging
from typing import List, Tuple, Dict, Any, Optional
from lpm_kernel.file_data.embedding_service import EmbeddingService, ChunkDTO
from lpm_kernel.kernel.l1.l1_manager import get_latest_global_bio
logger = logging.getLogger(__name__)
class L0KnowledgeRetriever:
"""L0 knowledge retriever"""
def __init__(
self,
embedding_service: EmbeddingService,
similarity_threshold: float = 0.7,
max_chunks: int = 3,
):
"""
init L0 knowledge retriever
Args:
embedding_service: Embedding service instance
similarity_threshold: only return contents whose similarity bigger than this value
max_chunks: the maximum number of return chunks
"""
self.embedding_service = embedding_service
self.similarity_threshold = similarity_threshold
self.max_chunks = max_chunks
def retrieve(self, query: str) -> str:
"""
retrieve L0 knowledge
Args:
query: query content
Returns:
str: structured knowledge content, or empty string if no relevant knowledge found
"""
try:
# search related chunks
similar_chunks: List[
Tuple[ChunkDTO, float]
] = self.embedding_service.search_similar_chunks(
query=query, limit=self.max_chunks
)
# filter out low similarity chunks
if not similar_chunks:
return ""
knowledge_parts = []
for chunk, similarity in similar_chunks:
if similarity >= self.similarity_threshold:
knowledge_parts.append(chunk.content)
if not knowledge_parts:
return ""
# merge multiple knowledge parts into one
return "\n\n".join(knowledge_parts)
except Exception as e:
logger.error(f"L0 knowledge retrieval failed: {str(e)}")
return ""
class L1KnowledgeRetriever:
"""L1 knowledge retriever"""
def __init__(
self,
embedding_service: EmbeddingService,
similarity_threshold: float = 0.7,
max_shades: int = 3,
):
"""
init L1 knowledge retriever
Args:
embedding_service: Embedding service instance
similarity_threshold: only return contents whose similarity bigger than this value
max_shades: the maximum number of return shades
"""
self.embedding_service = embedding_service
self.similarity_threshold = similarity_threshold
self.max_shades = max_shades
def retrieve(self, query: str) -> str:
"""
search related L1 shades
Args:
query: query content
Returns:
str: structured knowledge content, or empty string if no relevant knowledge found
"""
try:
# get global bio shades
global_bio = get_latest_global_bio()
if not global_bio or not global_bio.shades:
logger.info("Global Bio not found or Shades is empty")
return ""
# get query embedding
query_embedding = self.embedding_service.get_embedding(query)
if not query_embedding:
logger.error("Failed to get embedding for query text")
return ""
# get all shades' embeddings
shade_embeddings = []
for shade in global_bio.shades:
shade_text = (
f"{shade.get('title', '')} - {shade.get('description', '')}"
)
embedding = self.embedding_service.get_embedding(shade_text)
if embedding:
shade_embeddings.append((shade, embedding))
if not shade_embeddings:
logger.info("No available Shades embeddings found")
return ""
# calculate similarity and sort
similar_shades = []
for shade, embedding in shade_embeddings:
similarity = self.embedding_service.calculate_similarity(
query_embedding, embedding
)
if similarity >= self.similarity_threshold:
similar_shades.append((shade, similarity))
# sort according to similarity and limit the number of returned shades
similar_shades.sort(key=lambda x: x[1], reverse=True)
similar_shades = similar_shades[: self.max_shades]
if not similar_shades:
return ""
# structured output
shade_parts = []
for shade, similarity in similar_shades:
shade_text = f"Shade: {shade.get('title', '')}\n"
shade_text += f"Description: {shade.get('description', '')}\n"
shade_text += f"Similarity: {similarity:.2f}"
shade_parts.append(shade_text)
return "\n\n".join(shade_parts)
except Exception as e:
logger.error(f"L1 knowledge retrieval failed: {str(e)}")
return ""
# create overall knowledge retriever instance
default_retriever = L0KnowledgeRetriever(
embedding_service=EmbeddingService(), similarity_threshold=0.7, max_chunks=3
)
default_l1_retriever = L1KnowledgeRetriever(
embedding_service=EmbeddingService(), similarity_threshold=0.7, max_shades=3
)
|