mozzic commited on
Commit
7e22d15
·
verified ·
1 Parent(s): 5a1d3ae

Upload src\retrieval.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src//retrieval.py +60 -0
src//retrieval.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Retrieval system for context-aware question answering
3
+ """
4
+
5
+ from typing import List, Dict, Optional
6
+ from src.models import ContextUnit, RetrievalResult, QueryRequest
7
+
8
+
9
+ class ContextBuilder:
10
+ """Builds context windows for LLM queries."""
11
+
12
+ @staticmethod
13
+ def build_context_for_llm(units: List[ContextUnit], query: str, max_tokens: int = 3000) -> str:
14
+ """Build context string for LLM."""
15
+ context_parts = []
16
+
17
+ for unit in units:
18
+ part = f"Cell {unit.cell.cell_id} ({unit.cell.cell_type}):\n"
19
+ if unit.intent and unit.intent != "[Pending intent inference]":
20
+ part += f"Intent: {unit.intent}\n"
21
+ part += f"Content: {unit.cell.source[:500]}\n"
22
+ if unit.dependencies:
23
+ part += f"Dependencies: {', '.join(unit.dependencies)}\n"
24
+ part += "\n"
25
+
26
+ context_parts.append(part)
27
+
28
+ context = "\n".join(context_parts)
29
+
30
+ # Truncate if too long
31
+ if len(context) > max_tokens * 4: # Rough token estimate
32
+ context = context[:max_tokens * 4] + "..."
33
+
34
+ return context
35
+
36
+
37
+ class RetrievalEngine:
38
+ """Main retrieval engine."""
39
+
40
+ def __init__(self, context_thread, indexer):
41
+ self.context_thread = context_thread
42
+ self.indexer = indexer
43
+
44
+ def retrieve(self, query: str, top_k: int = 5) -> RetrievalResult:
45
+ """Retrieve relevant context units."""
46
+ # Use semantic search
47
+ semantic_results = self.indexer.search_units(query, k=top_k)
48
+
49
+ # Extract units and scores
50
+ units = [unit for unit, score in semantic_results]
51
+ scores = [score for unit, score in semantic_results]
52
+
53
+ # For now, just return semantic results
54
+ # In full implementation, combine with structural retrieval
55
+
56
+ return RetrievalResult(
57
+ units=units,
58
+ scores=scores,
59
+ query=query
60
+ )