File size: 3,222 Bytes
bb013a1
 
 
397e8b0
 
 
bb013a1
 
397e8b0
bb013a1
 
 
 
 
 
 
 
 
 
 
 
397e8b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb013a1
397e8b0
 
bb013a1
 
 
 
 
 
397e8b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
src/db.py
=========
Conexion a la base de datos SQLite de Northwind.
El esquema se define de forma fija y exacta para evitar que el LLM
invente columnas que no existen.
"""

from sqlalchemy import create_engine, text
import pandas as pd

_engine = None


def get_engine(db_path: str = "northwind.db"):
    global _engine
    if _engine is None:
        _engine = create_engine(f"sqlite:///{db_path}", echo=False)
    return _engine


# Esquema exacto verificado contra la BD real.
# Se define hardcodeado para garantizar precision total al LLM.
_SCHEMA = """
Database: Northwind Traders (SQLite)

Tables:

categories (
    category_id   INTEGER PK,
    category_name VARCHAR(25),   -- e.g. Beverages, Condiments, Seafood, Dairy Products
    description   VARCHAR(255)
)

suppliers (
    supplier_id   INTEGER PK,
    supplier_name VARCHAR(50),
    contact_name  VARCHAR(50),
    address       VARCHAR(50),
    city          VARCHAR(20),
    postal_code   VARCHAR(10),
    country       VARCHAR(15),
    phone         VARCHAR(15)
)

shippers (
    shipper_id   INTEGER PK,
    shipper_name VARCHAR(25),    -- Speedy Express, United Package, Federal Shipping
    phone        VARCHAR(15)
)

customers (
    customer_id   INTEGER PK,
    customer_name VARCHAR(50),   -- use this column to filter by customer name
    contact_name  VARCHAR(50),
    address       VARCHAR(50),
    city          VARCHAR(20),
    postal_code   VARCHAR(10),
    country       VARCHAR(15)
)

employees (
    employee_id INTEGER PK,
    last_name   VARCHAR(15),
    first_name  VARCHAR(15),
    birth_date  DATETIME,
    photo       VARCHAR(25),
    notes       VARCHAR(1024)
)

products (
    product_id   INTEGER PK,
    product_name VARCHAR(50),
    supplier_id  INTEGER FK -> suppliers.supplier_id,
    category_id  INTEGER FK -> categories.category_id,
    unit         VARCHAR(25),
    price        NUMERIC       -- unit price of the product
)

orders (
    order_id    INTEGER PK,
    customer_id INTEGER FK -> customers.customer_id,
    employee_id INTEGER FK -> employees.employee_id,
    order_date  DATETIME,
    shipper_id  INTEGER FK -> shippers.shipper_id
)

order_details (
    order_detail_id INTEGER PK,
    order_id        INTEGER FK -> orders.order_id,
    product_id      INTEGER FK -> products.product_id,
    quantity        INTEGER
)

CRITICAL RULES — MUST FOLLOW:
- Revenue = order_details.quantity * products.price  (NO unit_price column exists anywhere)
- Employee full name: first_name || ' ' || last_name
- Customer search: use customer_name column (NOT company_name, that column does not exist)
- Date formatting: strftime('%Y-%m', order_date)
- String concatenation: use || operator (NOT CONCAT)
- All table and column names are lowercase snake_case
- There is NO discount column in order_details
"""


def get_schema(db_path: str = "northwind.db") -> str:
    """Retorna el esquema exacto de la base de datos Northwind."""
    return _SCHEMA


def run_query(sql: str, db_path: str = "northwind.db") -> pd.DataFrame:
    """Ejecuta una query y retorna un DataFrame. Lanza excepcion si falla."""
    engine = get_engine(db_path)
    with engine.connect() as conn:
        return pd.read_sql_query(text(sql), conn)