# app/tools/sql_tool.py import os, sqlite3, random, math, string from datetime import date, timedelta from typing import Optional, Dict, Any, List, Tuple REGIONS: List[Tuple[str, str]] = [ ("NCR", "Delhi NCR"), ("BLR", "Bengaluru"), ("MUM", "Mumbai"), ("HYD", "Hyderabad"), ("CHN", "Chennai"), ("PUN", "Pune"), ] PRODUCTS: List[Tuple[str, str, str, float]] = [ ("ELEC-001", "Electronics", "Smartphone Alpha", 29999.0), ("ELEC-002", "Electronics", "Smartphone Pro", 49999.0), ("ELEC-003", "Electronics", "Laptop 14\"", 65999.0), ("ELEC-004", "Electronics", "Earbuds", 3999.0), ("APP-001", "Apparel", "Athleisure Tee", 999.0), ("APP-002", "Apparel", "Formal Shirt", 1599.0), ("APP-003", "Apparel", "Denim Jeans", 2499.0), ("GROC-001", "Grocery", "Olive Oil 1L", 799.0), ("GROC-002", "Grocery", "Basmati 5kg", 899.0), ("GROC-003", "Grocery", "Almonds 1kg", 1199.0), ("HOME-001", "Home", "Mixer Grinder", 3499.0), ("HOME-002", "Home", "Air Fryer", 6999.0), ] def _rand_name(rnd: random.Random): first = ["Ramesh","Suresh","Mahesh","Amit","Priya","Anita","Kiran","Sunil","Neha","Pooja","Ravi","Vijay","Anil","Meera","Tarun"] last = ["Kumar","Sharma","Patel","Verma","Reddy","Iyer","Das","Ghosh","Yadav","Gupta","Singh","Menon"] return f"{rnd.choice(first)} {rnd.choice(last)}" class SQLTool: """ Enterprise-ish SQLite schema with dims/facts and safe read-only SQL execution. dim_region(code, name) dim_product(sku, category, name, price) dim_employee(emp_id, name, region_code, role, hire_date) fact_sales(day, region_code, sku, channel, units, revenue) -- daily aggregates fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue) -- retail split by employee inv_stock(day, region_code, sku, on_hand_qty) """ def __init__(self, db_path: Optional[str] = None): path = db_path or os.getenv("SQLITE_PATH", ":memory:") self.path = path self.conn = sqlite3.connect(path, check_same_thread=False) self.conn.execute("PRAGMA journal_mode=WAL;") self.conn.execute("PRAGMA synchronous=NORMAL;") # ---------------------- schema & seed ---------------------- def setup_demo_enterprise(self, start: str = "2025-08-10", end: str = "2025-09-10", seed: int = 42): cur = self.conn.cursor() cur.executescript( """ CREATE TABLE IF NOT EXISTS dim_region ( code TEXT PRIMARY KEY, name TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS dim_product ( sku TEXT PRIMARY KEY, category TEXT NOT NULL, name TEXT NOT NULL, price REAL NOT NULL ); CREATE TABLE IF NOT EXISTS dim_employee ( emp_id TEXT PRIMARY KEY, name TEXT NOT NULL, region_code TEXT NOT NULL REFERENCES dim_region(code), role TEXT NOT NULL, hire_date TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS fact_sales ( day TEXT NOT NULL, region_code TEXT NOT NULL REFERENCES dim_region(code), sku TEXT NOT NULL REFERENCES dim_product(sku), channel TEXT NOT NULL, units INTEGER NOT NULL, revenue REAL NOT NULL, PRIMARY KEY (day, region_code, sku, channel) ); CREATE TABLE IF NOT EXISTS fact_sales_detail ( day TEXT NOT NULL, region_code TEXT NOT NULL REFERENCES dim_region(code), sku TEXT NOT NULL REFERENCES dim_product(sku), channel TEXT NOT NULL, employee_id TEXT NULL REFERENCES dim_employee(emp_id), units INTEGER NOT NULL, revenue REAL NOT NULL ); CREATE TABLE IF NOT EXISTS inv_stock ( day TEXT NOT NULL, region_code TEXT NOT NULL REFERENCES dim_region(code), sku TEXT NOT NULL REFERENCES dim_product(sku), on_hand_qty INTEGER NOT NULL, PRIMARY KEY (day, region_code, sku) ); CREATE INDEX IF NOT EXISTS idx_sales_day ON fact_sales(day); CREATE INDEX IF NOT EXISTS idx_sales_region ON fact_sales(region_code); CREATE INDEX IF NOT EXISTS idx_sales_sku ON fact_sales(sku); CREATE INDEX IF NOT EXISTS idx_sales_day_region ON fact_sales(day, region_code); CREATE INDEX IF NOT EXISTS idx_detail_day_region ON fact_sales_detail(day, region_code); CREATE INDEX IF NOT EXISTS idx_detail_emp ON fact_sales_detail(employee_id); CREATE INDEX IF NOT EXISTS idx_stock_day_region ON inv_stock(day, region_code); """ ) existing = set(r[0] for r in cur.execute("SELECT code FROM dim_region")) to_ins = [(c, n) for c, n in REGIONS if c not in existing] if to_ins: cur.executemany("INSERT INTO dim_region(code, name) VALUES (?,?)", to_ins) existing_sku = set(r[0] for r in cur.execute("SELECT sku FROM dim_product")) to_ins_p = [p for p in PRODUCTS if p[0] not in existing_sku] if to_ins_p: cur.executemany("INSERT INTO dim_product(sku, category, name, price) VALUES (?,?,?,?)", to_ins_p) emp_count = cur.execute("SELECT COUNT(*) FROM dim_employee").fetchone()[0] rnd = random.Random(seed) if emp_count == 0: rows = [] for code, _ in REGIONS: n = 8 for _ in range(n): emp_id = f"E{code}{rnd.randint(1000,9999)}" rows.append((emp_id, _rand_name(rnd), code, rnd.choice(["AE","SE","AM"]), "2023-01-01")) cur.executemany("INSERT INTO dim_employee(emp_id,name,region_code,role,hire_date) VALUES (?,?,?,?,?)", rows) n_sales = cur.execute("SELECT COUNT(*) FROM fact_sales").fetchone()[0] if n_sales == 0: self._seed_fact_sales(cur, start, end, seed) n_detail = cur.execute("SELECT COUNT(*) FROM fact_sales_detail").fetchone()[0] if n_detail == 0: self._seed_sales_detail(cur, seed) n_stock = cur.execute("SELECT COUNT(*) FROM inv_stock").fetchone()[0] if n_stock == 0: self._seed_stock(cur, start, end, seed) self.conn.commit() def _seed_fact_sales(self, cur: sqlite3.Cursor, start: str, end: str, seed: int): rnd = random.Random(seed) start_d = date.fromisoformat(start) end_d = date.fromisoformat(end) days = (end_d - start_d).days + 1 region_factor = {"NCR":1.25,"MUM":1.15,"BLR":1.10,"HYD":0.95,"CHN":0.90,"PUN":0.85} channels = ["Online","Retail"] batch = [] for i in range(days): d = (start_d + timedelta(days=i)).isoformat() wknd = (start_d + timedelta(days=i)).weekday() >= 5 wknd_boost = 1.10 if wknd else 1.0 for code, _name in REGIONS: rfac = region_factor[code] for sku, category, _nm, price in PRODUCTS: for ch in channels: base = {"Electronics":12,"Apparel":25,"Grocery":40,"Home":9}[category] ch_mult = 1.15 if ch == "Online" else 0.95 mu = base * rfac * wknd_boost * ch_mult sigma = max(1.0, mu * 0.25) units = max(0, int(rnd.gauss(mu, sigma))) season = 1.0 + 0.08 * math.sin(i / 3.5) revenue = round(units * price * season, 2) batch.append((d, code, sku, ch, units, revenue)) cur.executemany( "INSERT INTO fact_sales(day, region_code, sku, channel, units, revenue) VALUES (?,?,?,?,?,?)", batch ) def _seed_sales_detail(self, cur: sqlite3.Cursor, seed: int): rnd = random.Random(seed+7) rows = cur.execute( "SELECT day, region_code, sku, units, revenue FROM fact_sales WHERE channel='Retail'" ).fetchall() for d, region, sku, units, revenue in rows: if units == 0: continue emp_ids = [r[0] for r in cur.execute("SELECT emp_id FROM dim_employee WHERE region_code=?", (region,))] parts = rnd.randint(1, min(4, max(1, units))) cuts = sorted(rnd.sample(range(1, units), parts-1)) if units > parts else [] splits = [b-a for a,b in zip([0]+cuts, cuts+[units])] total_units = float(sum(splits)) rev_splits = [round(revenue * (u/total_units), 2) for u in splits] if rev_splits: drift = round(revenue - sum(rev_splits), 2) rev_splits[0] += drift for u, r in zip(splits, rev_splits): emp = rnd.choice(emp_ids) if emp_ids else None cur.execute( "INSERT INTO fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue) " "VALUES (?,?,?,?,?,?,?)", (d, region, sku, "Retail", emp, u, r) ) def _seed_stock(self, cur: sqlite3.Cursor, start: str, end: str, seed: int): rnd = random.Random(seed+13) start_d = date.fromisoformat(start) end_d = date.fromisoformat(end) days = (end_d - start_d).days + 1 for i in range(days): d = (start_d + timedelta(days=i)).isoformat() for code, _ in REGIONS: for sku, category, _nm, _price in PRODUCTS: base = {"Electronics":400,"Apparel":800,"Grocery":600,"Home":300}[category] noise = rnd.randint(-30, 30) on_hand = max(0, base + noise - i*2) cur.execute( "INSERT INTO inv_stock(day, region_code, sku, on_hand_qty) VALUES (?,?,?,?)", (d, code, sku, on_hand) ) # ---------------------- helpers + read-only executor ---------------------- def region_codes(self) -> List[str]: rows = self.conn.execute("SELECT code FROM dim_region").fetchall() return [r[0] for r in rows] def execute_sql_readonly(self, sql: str) -> Dict[str, Any]: """Hard safety: allow only a single SELECT statement; no comments/CTEs with semicolons.""" s = sql.strip() bad = (";", "--", "/*", "*/") if not s.lower().startswith("select"): raise ValueError("Only SELECT statements are allowed.") if any(tok in s for tok in bad): raise ValueError("Disallowed token in SQL.") cur = self.conn.cursor() cur.execute(s) cols = [d[0] for d in cur.description] if cur.description else [] rows = cur.fetchall() return {"columns": cols, "rows": rows, "rowcount": len(rows)} if __name__ == "__main__": import argparse import os import sys from pathlib import Path parser = argparse.ArgumentParser( description="Create and persist the demo enterprise SQLite database." ) parser.add_argument( "--db", default=os.getenv("SQLITE_PATH", "./demo_enterprise.sqlite"), help="Path to the SQLite DB file to create (defaults to ./demo_enterprise.sqlite or $SQLITE_PATH).", ) parser.add_argument( "--start", default="2025-08-10", help="Start date (YYYY-MM-DD) for seeding data.", ) parser.add_argument( "--end", default="2025-09-10", help="End date (YYYY-MM-DD) for seeding data.", ) parser.add_argument( "--seed", type=int, default=42, help="Random seed used for deterministic seeding.", ) args = parser.parse_args() # Ensure we persist to disk (avoid in-memory DB). db_path = args.db if db_path == ":memory:": print("':memory:' was requested; switching to ./demo_enterprise.sqlite so the DB is saved to disk.") db_path = "./demo_enterprise.db" # Make sure the parent directory exists parent = Path(db_path).expanduser().resolve().parent parent.mkdir(parents=True, exist_ok=True) tool = SQLTool(db_path=str(Path(db_path).expanduser())) tool.setup_demo_enterprise(start=args.start, end=args.end, seed=args.seed) # Flush and close tool.conn.commit() tool.conn.close() print(f"✅ Database created at: {Path(db_path).expanduser().resolve()}") print(f" Seed window: {args.start} → {args.end} | seed={args.seed}") # # Default location ./demo_enterprise.sqlite # python app/tools/sql_tool.py # # Custom location and date range # python BI_Assistant_Backend/app/tools/sql_tool.py --db ./data/retail_demo.sqlite --start 2025-08-01 --end 2025-09-10 --seed 123