jhonacmarvik's picture
fix router, schema, prompts and examples
397e8b0
"""
src/db.py
=========
Conexion a la base de datos SQLite de Northwind.
El esquema se define de forma fija y exacta para evitar que el LLM
invente columnas que no existen.
"""
from sqlalchemy import create_engine, text
import pandas as pd
_engine = None
def get_engine(db_path: str = "northwind.db"):
global _engine
if _engine is None:
_engine = create_engine(f"sqlite:///{db_path}", echo=False)
return _engine
# Esquema exacto verificado contra la BD real.
# Se define hardcodeado para garantizar precision total al LLM.
_SCHEMA = """
Database: Northwind Traders (SQLite)
Tables:
categories (
category_id INTEGER PK,
category_name VARCHAR(25), -- e.g. Beverages, Condiments, Seafood, Dairy Products
description VARCHAR(255)
)
suppliers (
supplier_id INTEGER PK,
supplier_name VARCHAR(50),
contact_name VARCHAR(50),
address VARCHAR(50),
city VARCHAR(20),
postal_code VARCHAR(10),
country VARCHAR(15),
phone VARCHAR(15)
)
shippers (
shipper_id INTEGER PK,
shipper_name VARCHAR(25), -- Speedy Express, United Package, Federal Shipping
phone VARCHAR(15)
)
customers (
customer_id INTEGER PK,
customer_name VARCHAR(50), -- use this column to filter by customer name
contact_name VARCHAR(50),
address VARCHAR(50),
city VARCHAR(20),
postal_code VARCHAR(10),
country VARCHAR(15)
)
employees (
employee_id INTEGER PK,
last_name VARCHAR(15),
first_name VARCHAR(15),
birth_date DATETIME,
photo VARCHAR(25),
notes VARCHAR(1024)
)
products (
product_id INTEGER PK,
product_name VARCHAR(50),
supplier_id INTEGER FK -> suppliers.supplier_id,
category_id INTEGER FK -> categories.category_id,
unit VARCHAR(25),
price NUMERIC -- unit price of the product
)
orders (
order_id INTEGER PK,
customer_id INTEGER FK -> customers.customer_id,
employee_id INTEGER FK -> employees.employee_id,
order_date DATETIME,
shipper_id INTEGER FK -> shippers.shipper_id
)
order_details (
order_detail_id INTEGER PK,
order_id INTEGER FK -> orders.order_id,
product_id INTEGER FK -> products.product_id,
quantity INTEGER
)
CRITICAL RULES — MUST FOLLOW:
- Revenue = order_details.quantity * products.price (NO unit_price column exists anywhere)
- Employee full name: first_name || ' ' || last_name
- Customer search: use customer_name column (NOT company_name, that column does not exist)
- Date formatting: strftime('%Y-%m', order_date)
- String concatenation: use || operator (NOT CONCAT)
- All table and column names are lowercase snake_case
- There is NO discount column in order_details
"""
def get_schema(db_path: str = "northwind.db") -> str:
"""Retorna el esquema exacto de la base de datos Northwind."""
return _SCHEMA
def run_query(sql: str, db_path: str = "northwind.db") -> pd.DataFrame:
"""Ejecuta una query y retorna un DataFrame. Lanza excepcion si falla."""
engine = get_engine(db_path)
with engine.connect() as conn:
return pd.read_sql_query(text(sql), conn)