"""NL-to-SQL query tool — translates natural language to a validated SELECT, executes via asyncpg.""" from __future__ import annotations import logging import re from agent.agents._gemini import call_gemini_json from agent.config import settings from agent.models import RetrievedChunk logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Schema context fed to the LLM for SQL generation # --------------------------------------------------------------------------- _SCHEMA_CONTEXT = """ documents: doc_id (text, unique), title (text), source_url (text), source_type (text: 'confluence'|'github'|'jira'|'file'|'url'), team_id (text), metadata (jsonb), created_at (timestamptz), updated_at (timestamptz) chunks: chunk_id (text, unique), doc_id (text FK→documents.doc_id), text (text), source (text), source_type (text), team_id (text), chunk_index (integer), created_at (timestamptz) ingest_jobs: job_id (text), status (text: 'pending'|'running'|'completed'|'failed'), source_type (text), team_id (text), chunks_ingested (integer), error (text nullable), created_at (timestamptz), completed_at (timestamptz nullable) """ SQL_NL_TO_SQL_PROMPT = """\ You are a SQL generation agent for an Enterprise Knowledge Copilot backed by PostgreSQL (Supabase). Translate the user's natural language question into a safe SELECT query. Available tables and columns: {schema} Rules: 1. Generate ONLY a SELECT statement — never INSERT, UPDATE, DELETE, DROP, ALTER, TRUNCATE, or CREATE. 2. ALWAYS include `team_id = ''` in every table's WHERE clause. 3. Use ONLY the tables listed above. 4. Always end with `LIMIT {max_rows}`. 5. Use COUNT(*), SUM, AVG, MAX, MIN for aggregations when the question asks for totals or stats. 6. For recency, use `created_at >= NOW() - INTERVAL '7 days'` style syntax. 7. Cast uuid columns with `::text` when displaying them. Return ONLY valid JSON — no preamble, no markdown fences: {{ "sql": "