Spaces:
Sleeping
Sleeping
File size: 3,222 Bytes
bb013a1 397e8b0 bb013a1 397e8b0 bb013a1 397e8b0 bb013a1 397e8b0 bb013a1 397e8b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | """
src/db.py
=========
Conexion a la base de datos SQLite de Northwind.
El esquema se define de forma fija y exacta para evitar que el LLM
invente columnas que no existen.
"""
from sqlalchemy import create_engine, text
import pandas as pd
_engine = None
def get_engine(db_path: str = "northwind.db"):
global _engine
if _engine is None:
_engine = create_engine(f"sqlite:///{db_path}", echo=False)
return _engine
# Esquema exacto verificado contra la BD real.
# Se define hardcodeado para garantizar precision total al LLM.
_SCHEMA = """
Database: Northwind Traders (SQLite)
Tables:
categories (
category_id INTEGER PK,
category_name VARCHAR(25), -- e.g. Beverages, Condiments, Seafood, Dairy Products
description VARCHAR(255)
)
suppliers (
supplier_id INTEGER PK,
supplier_name VARCHAR(50),
contact_name VARCHAR(50),
address VARCHAR(50),
city VARCHAR(20),
postal_code VARCHAR(10),
country VARCHAR(15),
phone VARCHAR(15)
)
shippers (
shipper_id INTEGER PK,
shipper_name VARCHAR(25), -- Speedy Express, United Package, Federal Shipping
phone VARCHAR(15)
)
customers (
customer_id INTEGER PK,
customer_name VARCHAR(50), -- use this column to filter by customer name
contact_name VARCHAR(50),
address VARCHAR(50),
city VARCHAR(20),
postal_code VARCHAR(10),
country VARCHAR(15)
)
employees (
employee_id INTEGER PK,
last_name VARCHAR(15),
first_name VARCHAR(15),
birth_date DATETIME,
photo VARCHAR(25),
notes VARCHAR(1024)
)
products (
product_id INTEGER PK,
product_name VARCHAR(50),
supplier_id INTEGER FK -> suppliers.supplier_id,
category_id INTEGER FK -> categories.category_id,
unit VARCHAR(25),
price NUMERIC -- unit price of the product
)
orders (
order_id INTEGER PK,
customer_id INTEGER FK -> customers.customer_id,
employee_id INTEGER FK -> employees.employee_id,
order_date DATETIME,
shipper_id INTEGER FK -> shippers.shipper_id
)
order_details (
order_detail_id INTEGER PK,
order_id INTEGER FK -> orders.order_id,
product_id INTEGER FK -> products.product_id,
quantity INTEGER
)
CRITICAL RULES — MUST FOLLOW:
- Revenue = order_details.quantity * products.price (NO unit_price column exists anywhere)
- Employee full name: first_name || ' ' || last_name
- Customer search: use customer_name column (NOT company_name, that column does not exist)
- Date formatting: strftime('%Y-%m', order_date)
- String concatenation: use || operator (NOT CONCAT)
- All table and column names are lowercase snake_case
- There is NO discount column in order_details
"""
def get_schema(db_path: str = "northwind.db") -> str:
"""Retorna el esquema exacto de la base de datos Northwind."""
return _SCHEMA
def run_query(sql: str, db_path: str = "northwind.db") -> pd.DataFrame:
"""Ejecuta una query y retorna un DataFrame. Lanza excepcion si falla."""
engine = get_engine(db_path)
with engine.connect() as conn:
return pd.read_sql_query(text(sql), conn) |