File size: 2,047 Bytes
7e22d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""

Retrieval system for context-aware question answering

"""

from typing import List, Dict, Optional
from src.models import ContextUnit, RetrievalResult, QueryRequest


class ContextBuilder:
    """Builds context windows for LLM queries."""
    
    @staticmethod
    def build_context_for_llm(units: List[ContextUnit], query: str, max_tokens: int = 3000) -> str:
        """Build context string for LLM."""
        context_parts = []
        
        for unit in units:
            part = f"Cell {unit.cell.cell_id} ({unit.cell.cell_type}):\n"
            if unit.intent and unit.intent != "[Pending intent inference]":
                part += f"Intent: {unit.intent}\n"
            part += f"Content: {unit.cell.source[:500]}\n"
            if unit.dependencies:
                part += f"Dependencies: {', '.join(unit.dependencies)}\n"
            part += "\n"
            
            context_parts.append(part)
        
        context = "\n".join(context_parts)
        
        # Truncate if too long
        if len(context) > max_tokens * 4:  # Rough token estimate
            context = context[:max_tokens * 4] + "..."
        
        return context


class RetrievalEngine:
    """Main retrieval engine."""
    
    def __init__(self, context_thread, indexer):
        self.context_thread = context_thread
        self.indexer = indexer
    
    def retrieve(self, query: str, top_k: int = 5) -> RetrievalResult:
        """Retrieve relevant context units."""
        # Use semantic search
        semantic_results = self.indexer.search_units(query, k=top_k)
        
        # Extract units and scores
        units = [unit for unit, score in semantic_results]
        scores = [score for unit, score in semantic_results]
        
        # For now, just return semantic results
        # In full implementation, combine with structural retrieval
        
        return RetrievalResult(
            units=units,
            scores=scores,
            query=query
        )