File size: 5,520 Bytes
01d5a5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
service about knowledge retrieve
"""
import logging
from typing import List, Tuple, Dict, Any, Optional
from lpm_kernel.file_data.embedding_service import EmbeddingService, ChunkDTO
from lpm_kernel.kernel.l1.l1_manager import get_latest_global_bio

logger = logging.getLogger(__name__)


class L0KnowledgeRetriever:
    """L0 knowledge retriever"""

    def __init__(
        self,
        embedding_service: EmbeddingService,
        similarity_threshold: float = 0.7,
        max_chunks: int = 3,
    ):
        """
        init L0 knowledge retriever

        Args:
            embedding_service: Embedding service instance
            similarity_threshold: only return contents whose similarity bigger than this value
            max_chunks: the maximum number of return chunks
        """
        self.embedding_service = embedding_service
        self.similarity_threshold = similarity_threshold
        self.max_chunks = max_chunks

    def retrieve(self, query: str) -> str:
        """
        retrieve L0 knowledge

        Args:
            query: query content

        Returns:
            str: structured knowledge content, or empty string if no relevant knowledge found
        """
        try:
            # search related chunks
            similar_chunks: List[
                Tuple[ChunkDTO, float]
            ] = self.embedding_service.search_similar_chunks(
                query=query, limit=self.max_chunks
            )

            # filter out low similarity chunks
            if not similar_chunks:
                return ""

            knowledge_parts = []
            for chunk, similarity in similar_chunks:
                if similarity >= self.similarity_threshold:
                    knowledge_parts.append(chunk.content)

            if not knowledge_parts:
                return ""

            # merge multiple knowledge parts into one
            return "\n\n".join(knowledge_parts)

        except Exception as e:
            logger.error(f"L0 knowledge retrieval failed: {str(e)}")
            return ""


class L1KnowledgeRetriever:
    """L1 knowledge retriever"""

    def __init__(
        self,
        embedding_service: EmbeddingService,
        similarity_threshold: float = 0.7,
        max_shades: int = 3,
    ):
        """
        init L1 knowledge retriever

        Args:
            embedding_service: Embedding service instance
            similarity_threshold: only return contents whose similarity bigger than this value
            max_shades: the maximum number of return shades
        """
        self.embedding_service = embedding_service
        self.similarity_threshold = similarity_threshold
        self.max_shades = max_shades

    def retrieve(self, query: str) -> str:
        """
        search related L1 shades

        Args:
            query: query content

        Returns:
            str: structured knowledge content, or empty string if no relevant knowledge found
        """
        try:
            # get global bio shades
            global_bio = get_latest_global_bio()
            if not global_bio or not global_bio.shades:
                logger.info("Global Bio not found or Shades is empty")
                return ""

            # get query embedding
            query_embedding = self.embedding_service.get_embedding(query)
            if not query_embedding:
                logger.error("Failed to get embedding for query text")
                return ""

            # get all shades' embeddings
            shade_embeddings = []
            for shade in global_bio.shades:
                shade_text = (
                    f"{shade.get('title', '')} - {shade.get('description', '')}"
                )
                embedding = self.embedding_service.get_embedding(shade_text)
                if embedding:
                    shade_embeddings.append((shade, embedding))

            if not shade_embeddings:
                logger.info("No available Shades embeddings found")
                return ""

            # calculate similarity and sort
            similar_shades = []
            for shade, embedding in shade_embeddings:
                similarity = self.embedding_service.calculate_similarity(
                    query_embedding, embedding
                )
                if similarity >= self.similarity_threshold:
                    similar_shades.append((shade, similarity))

            # sort according to similarity and limit the number of returned shades
            similar_shades.sort(key=lambda x: x[1], reverse=True)
            similar_shades = similar_shades[: self.max_shades]

            if not similar_shades:
                return ""

            # structured output
            shade_parts = []
            for shade, similarity in similar_shades:
                shade_text = f"Shade: {shade.get('title', '')}\n"
                shade_text += f"Description: {shade.get('description', '')}\n"
                shade_text += f"Similarity: {similarity:.2f}"
                shade_parts.append(shade_text)

            return "\n\n".join(shade_parts)

        except Exception as e:
            logger.error(f"L1 knowledge retrieval failed: {str(e)}")
            return ""


# create overall knowledge retriever instance
default_retriever = L0KnowledgeRetriever(
    embedding_service=EmbeddingService(), similarity_threshold=0.7, max_chunks=3
)

default_l1_retriever = L1KnowledgeRetriever(
    embedding_service=EmbeddingService(), similarity_threshold=0.7, max_shades=3
)