Spaces:
Sleeping
Sleeping
| """ | |
| src/db.py | |
| ========= | |
| Conexion a la base de datos SQLite de Northwind. | |
| El esquema se define de forma fija y exacta para evitar que el LLM | |
| invente columnas que no existen. | |
| """ | |
| from sqlalchemy import create_engine, text | |
| import pandas as pd | |
| _engine = None | |
| def get_engine(db_path: str = "northwind.db"): | |
| global _engine | |
| if _engine is None: | |
| _engine = create_engine(f"sqlite:///{db_path}", echo=False) | |
| return _engine | |
| # Esquema exacto verificado contra la BD real. | |
| # Se define hardcodeado para garantizar precision total al LLM. | |
| _SCHEMA = """ | |
| Database: Northwind Traders (SQLite) | |
| Tables: | |
| categories ( | |
| category_id INTEGER PK, | |
| category_name VARCHAR(25), -- e.g. Beverages, Condiments, Seafood, Dairy Products | |
| description VARCHAR(255) | |
| ) | |
| suppliers ( | |
| supplier_id INTEGER PK, | |
| supplier_name VARCHAR(50), | |
| contact_name VARCHAR(50), | |
| address VARCHAR(50), | |
| city VARCHAR(20), | |
| postal_code VARCHAR(10), | |
| country VARCHAR(15), | |
| phone VARCHAR(15) | |
| ) | |
| shippers ( | |
| shipper_id INTEGER PK, | |
| shipper_name VARCHAR(25), -- Speedy Express, United Package, Federal Shipping | |
| phone VARCHAR(15) | |
| ) | |
| customers ( | |
| customer_id INTEGER PK, | |
| customer_name VARCHAR(50), -- use this column to filter by customer name | |
| contact_name VARCHAR(50), | |
| address VARCHAR(50), | |
| city VARCHAR(20), | |
| postal_code VARCHAR(10), | |
| country VARCHAR(15) | |
| ) | |
| employees ( | |
| employee_id INTEGER PK, | |
| last_name VARCHAR(15), | |
| first_name VARCHAR(15), | |
| birth_date DATETIME, | |
| photo VARCHAR(25), | |
| notes VARCHAR(1024) | |
| ) | |
| products ( | |
| product_id INTEGER PK, | |
| product_name VARCHAR(50), | |
| supplier_id INTEGER FK -> suppliers.supplier_id, | |
| category_id INTEGER FK -> categories.category_id, | |
| unit VARCHAR(25), | |
| price NUMERIC -- unit price of the product | |
| ) | |
| orders ( | |
| order_id INTEGER PK, | |
| customer_id INTEGER FK -> customers.customer_id, | |
| employee_id INTEGER FK -> employees.employee_id, | |
| order_date DATETIME, | |
| shipper_id INTEGER FK -> shippers.shipper_id | |
| ) | |
| order_details ( | |
| order_detail_id INTEGER PK, | |
| order_id INTEGER FK -> orders.order_id, | |
| product_id INTEGER FK -> products.product_id, | |
| quantity INTEGER | |
| ) | |
| CRITICAL RULES — MUST FOLLOW: | |
| - Revenue = order_details.quantity * products.price (NO unit_price column exists anywhere) | |
| - Employee full name: first_name || ' ' || last_name | |
| - Customer search: use customer_name column (NOT company_name, that column does not exist) | |
| - Date formatting: strftime('%Y-%m', order_date) | |
| - String concatenation: use || operator (NOT CONCAT) | |
| - All table and column names are lowercase snake_case | |
| - There is NO discount column in order_details | |
| """ | |
| def get_schema(db_path: str = "northwind.db") -> str: | |
| """Retorna el esquema exacto de la base de datos Northwind.""" | |
| return _SCHEMA | |
| def run_query(sql: str, db_path: str = "northwind.db") -> pd.DataFrame: | |
| """Ejecuta una query y retorna un DataFrame. Lanza excepcion si falla.""" | |
| engine = get_engine(db_path) | |
| with engine.connect() as conn: | |
| return pd.read_sql_query(text(sql), conn) |