File size: 1,283 Bytes
a067ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
Schema retrieval engine for the SQL Agent.

For typical user uploads (1-3 tables, a few dozen columns) we can pass the
full schema to the model. ChromaDB-based semantic retrieval is overkill at
this scale; we keep the API simple and let SchemaExtractor produce
CREATE-TABLE-style context.

If the schema ever exceeds the model's context budget, this is the place
to add embedding-based filtering.
"""

import logging

import duckdb

from src.rag.schema_extractor import SchemaExtractor

logger = logging.getLogger(__name__)


class RAGEngine:
    """Light wrapper that returns the full schema as model context."""

    def __init__(self, con: duckdb.DuckDBPyConnection | None = None) -> None:
        self.con = con
        self.extractor: SchemaExtractor | None = (
            SchemaExtractor(con) if con is not None else None
        )

    def bind(self, con: duckdb.DuckDBPyConnection) -> None:
        self.con = con
        self.extractor = SchemaExtractor(con)

    def retrieve(self, question: str, top_k: int = 5) -> str:
        if not self.extractor:
            logger.warning("RAG engine called before bind()")
            return ""
        return self.extractor.get_schema_text()

    def clear(self) -> None:
        self.extractor = None
        self.con = None