Spaces:
Running on Zero
Running on Zero
| """ | |
| Schema retrieval engine for the SQL Agent. | |
| For typical user uploads (1-3 tables, a few dozen columns) we can pass the | |
| full schema to the model. ChromaDB-based semantic retrieval is overkill at | |
| this scale; we keep the API simple and let SchemaExtractor produce | |
| CREATE-TABLE-style context. | |
| If the schema ever exceeds the model's context budget, this is the place | |
| to add embedding-based filtering. | |
| """ | |
| import logging | |
| import duckdb | |
| from src.rag.schema_extractor import SchemaExtractor | |
| logger = logging.getLogger(__name__) | |
| class RAGEngine: | |
| """Light wrapper that returns the full schema as model context.""" | |
| def __init__(self, con: duckdb.DuckDBPyConnection | None = None) -> None: | |
| self.con = con | |
| self.extractor: SchemaExtractor | None = ( | |
| SchemaExtractor(con) if con is not None else None | |
| ) | |
| def bind(self, con: duckdb.DuckDBPyConnection) -> None: | |
| self.con = con | |
| self.extractor = SchemaExtractor(con) | |
| def retrieve(self, question: str, top_k: int = 5) -> str: | |
| if not self.extractor: | |
| logger.warning("RAG engine called before bind()") | |
| return "" | |
| return self.extractor.get_schema_text() | |
| def clear(self) -> None: | |
| self.extractor = None | |
| self.con = None | |