Spaces:
Sleeping
Sleeping
| # app/tools/sql_tool.py | |
| import os, sqlite3, random, math, string | |
| from datetime import date, timedelta | |
| from typing import Optional, Dict, Any, List, Tuple | |
| REGIONS: List[Tuple[str, str]] = [ | |
| ("NCR", "Delhi NCR"), | |
| ("BLR", "Bengaluru"), | |
| ("MUM", "Mumbai"), | |
| ("HYD", "Hyderabad"), | |
| ("CHN", "Chennai"), | |
| ("PUN", "Pune"), | |
| ] | |
| PRODUCTS: List[Tuple[str, str, str, float]] = [ | |
| ("ELEC-001", "Electronics", "Smartphone Alpha", 29999.0), | |
| ("ELEC-002", "Electronics", "Smartphone Pro", 49999.0), | |
| ("ELEC-003", "Electronics", "Laptop 14\"", 65999.0), | |
| ("ELEC-004", "Electronics", "Earbuds", 3999.0), | |
| ("APP-001", "Apparel", "Athleisure Tee", 999.0), | |
| ("APP-002", "Apparel", "Formal Shirt", 1599.0), | |
| ("APP-003", "Apparel", "Denim Jeans", 2499.0), | |
| ("GROC-001", "Grocery", "Olive Oil 1L", 799.0), | |
| ("GROC-002", "Grocery", "Basmati 5kg", 899.0), | |
| ("GROC-003", "Grocery", "Almonds 1kg", 1199.0), | |
| ("HOME-001", "Home", "Mixer Grinder", 3499.0), | |
| ("HOME-002", "Home", "Air Fryer", 6999.0), | |
| ] | |
| def _rand_name(rnd: random.Random): | |
| first = ["Ramesh","Suresh","Mahesh","Amit","Priya","Anita","Kiran","Sunil","Neha","Pooja","Ravi","Vijay","Anil","Meera","Tarun"] | |
| last = ["Kumar","Sharma","Patel","Verma","Reddy","Iyer","Das","Ghosh","Yadav","Gupta","Singh","Menon"] | |
| return f"{rnd.choice(first)} {rnd.choice(last)}" | |
| class SQLTool: | |
| """ | |
| Enterprise-ish SQLite schema with dims/facts and safe read-only SQL execution. | |
| dim_region(code, name) | |
| dim_product(sku, category, name, price) | |
| dim_employee(emp_id, name, region_code, role, hire_date) | |
| fact_sales(day, region_code, sku, channel, units, revenue) -- daily aggregates | |
| fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue) -- retail split by employee | |
| inv_stock(day, region_code, sku, on_hand_qty) | |
| """ | |
| def __init__(self, db_path: Optional[str] = None): | |
| path = db_path or os.getenv("SQLITE_PATH", ":memory:") | |
| self.path = path | |
| self.conn = sqlite3.connect(path, check_same_thread=False) | |
| self.conn.execute("PRAGMA journal_mode=WAL;") | |
| self.conn.execute("PRAGMA synchronous=NORMAL;") | |
| # ---------------------- schema & seed ---------------------- | |
| def setup_demo_enterprise(self, start: str = "2025-08-10", end: str = "2025-09-10", seed: int = 42): | |
| cur = self.conn.cursor() | |
| cur.executescript( | |
| """ | |
| CREATE TABLE IF NOT EXISTS dim_region ( | |
| code TEXT PRIMARY KEY, | |
| name TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS dim_product ( | |
| sku TEXT PRIMARY KEY, | |
| category TEXT NOT NULL, | |
| name TEXT NOT NULL, | |
| price REAL NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS dim_employee ( | |
| emp_id TEXT PRIMARY KEY, | |
| name TEXT NOT NULL, | |
| region_code TEXT NOT NULL REFERENCES dim_region(code), | |
| role TEXT NOT NULL, | |
| hire_date TEXT NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS fact_sales ( | |
| day TEXT NOT NULL, | |
| region_code TEXT NOT NULL REFERENCES dim_region(code), | |
| sku TEXT NOT NULL REFERENCES dim_product(sku), | |
| channel TEXT NOT NULL, | |
| units INTEGER NOT NULL, | |
| revenue REAL NOT NULL, | |
| PRIMARY KEY (day, region_code, sku, channel) | |
| ); | |
| CREATE TABLE IF NOT EXISTS fact_sales_detail ( | |
| day TEXT NOT NULL, | |
| region_code TEXT NOT NULL REFERENCES dim_region(code), | |
| sku TEXT NOT NULL REFERENCES dim_product(sku), | |
| channel TEXT NOT NULL, | |
| employee_id TEXT NULL REFERENCES dim_employee(emp_id), | |
| units INTEGER NOT NULL, | |
| revenue REAL NOT NULL | |
| ); | |
| CREATE TABLE IF NOT EXISTS inv_stock ( | |
| day TEXT NOT NULL, | |
| region_code TEXT NOT NULL REFERENCES dim_region(code), | |
| sku TEXT NOT NULL REFERENCES dim_product(sku), | |
| on_hand_qty INTEGER NOT NULL, | |
| PRIMARY KEY (day, region_code, sku) | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_sales_day ON fact_sales(day); | |
| CREATE INDEX IF NOT EXISTS idx_sales_region ON fact_sales(region_code); | |
| CREATE INDEX IF NOT EXISTS idx_sales_sku ON fact_sales(sku); | |
| CREATE INDEX IF NOT EXISTS idx_sales_day_region ON fact_sales(day, region_code); | |
| CREATE INDEX IF NOT EXISTS idx_detail_day_region ON fact_sales_detail(day, region_code); | |
| CREATE INDEX IF NOT EXISTS idx_detail_emp ON fact_sales_detail(employee_id); | |
| CREATE INDEX IF NOT EXISTS idx_stock_day_region ON inv_stock(day, region_code); | |
| """ | |
| ) | |
| existing = set(r[0] for r in cur.execute("SELECT code FROM dim_region")) | |
| to_ins = [(c, n) for c, n in REGIONS if c not in existing] | |
| if to_ins: | |
| cur.executemany("INSERT INTO dim_region(code, name) VALUES (?,?)", to_ins) | |
| existing_sku = set(r[0] for r in cur.execute("SELECT sku FROM dim_product")) | |
| to_ins_p = [p for p in PRODUCTS if p[0] not in existing_sku] | |
| if to_ins_p: | |
| cur.executemany("INSERT INTO dim_product(sku, category, name, price) VALUES (?,?,?,?)", to_ins_p) | |
| emp_count = cur.execute("SELECT COUNT(*) FROM dim_employee").fetchone()[0] | |
| rnd = random.Random(seed) | |
| if emp_count == 0: | |
| rows = [] | |
| for code, _ in REGIONS: | |
| n = 8 | |
| for _ in range(n): | |
| emp_id = f"E{code}{rnd.randint(1000,9999)}" | |
| rows.append((emp_id, _rand_name(rnd), code, rnd.choice(["AE","SE","AM"]), "2023-01-01")) | |
| cur.executemany("INSERT INTO dim_employee(emp_id,name,region_code,role,hire_date) VALUES (?,?,?,?,?)", rows) | |
| n_sales = cur.execute("SELECT COUNT(*) FROM fact_sales").fetchone()[0] | |
| if n_sales == 0: | |
| self._seed_fact_sales(cur, start, end, seed) | |
| n_detail = cur.execute("SELECT COUNT(*) FROM fact_sales_detail").fetchone()[0] | |
| if n_detail == 0: | |
| self._seed_sales_detail(cur, seed) | |
| n_stock = cur.execute("SELECT COUNT(*) FROM inv_stock").fetchone()[0] | |
| if n_stock == 0: | |
| self._seed_stock(cur, start, end, seed) | |
| self.conn.commit() | |
| def _seed_fact_sales(self, cur: sqlite3.Cursor, start: str, end: str, seed: int): | |
| rnd = random.Random(seed) | |
| start_d = date.fromisoformat(start) | |
| end_d = date.fromisoformat(end) | |
| days = (end_d - start_d).days + 1 | |
| region_factor = {"NCR":1.25,"MUM":1.15,"BLR":1.10,"HYD":0.95,"CHN":0.90,"PUN":0.85} | |
| channels = ["Online","Retail"] | |
| batch = [] | |
| for i in range(days): | |
| d = (start_d + timedelta(days=i)).isoformat() | |
| wknd = (start_d + timedelta(days=i)).weekday() >= 5 | |
| wknd_boost = 1.10 if wknd else 1.0 | |
| for code, _name in REGIONS: | |
| rfac = region_factor[code] | |
| for sku, category, _nm, price in PRODUCTS: | |
| for ch in channels: | |
| base = {"Electronics":12,"Apparel":25,"Grocery":40,"Home":9}[category] | |
| ch_mult = 1.15 if ch == "Online" else 0.95 | |
| mu = base * rfac * wknd_boost * ch_mult | |
| sigma = max(1.0, mu * 0.25) | |
| units = max(0, int(rnd.gauss(mu, sigma))) | |
| season = 1.0 + 0.08 * math.sin(i / 3.5) | |
| revenue = round(units * price * season, 2) | |
| batch.append((d, code, sku, ch, units, revenue)) | |
| cur.executemany( | |
| "INSERT INTO fact_sales(day, region_code, sku, channel, units, revenue) VALUES (?,?,?,?,?,?)", | |
| batch | |
| ) | |
| def _seed_sales_detail(self, cur: sqlite3.Cursor, seed: int): | |
| rnd = random.Random(seed+7) | |
| rows = cur.execute( | |
| "SELECT day, region_code, sku, units, revenue FROM fact_sales WHERE channel='Retail'" | |
| ).fetchall() | |
| for d, region, sku, units, revenue in rows: | |
| if units == 0: | |
| continue | |
| emp_ids = [r[0] for r in cur.execute("SELECT emp_id FROM dim_employee WHERE region_code=?", (region,))] | |
| parts = rnd.randint(1, min(4, max(1, units))) | |
| cuts = sorted(rnd.sample(range(1, units), parts-1)) if units > parts else [] | |
| splits = [b-a for a,b in zip([0]+cuts, cuts+[units])] | |
| total_units = float(sum(splits)) | |
| rev_splits = [round(revenue * (u/total_units), 2) for u in splits] | |
| if rev_splits: | |
| drift = round(revenue - sum(rev_splits), 2) | |
| rev_splits[0] += drift | |
| for u, r in zip(splits, rev_splits): | |
| emp = rnd.choice(emp_ids) if emp_ids else None | |
| cur.execute( | |
| "INSERT INTO fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue) " | |
| "VALUES (?,?,?,?,?,?,?)", | |
| (d, region, sku, "Retail", emp, u, r) | |
| ) | |
| def _seed_stock(self, cur: sqlite3.Cursor, start: str, end: str, seed: int): | |
| rnd = random.Random(seed+13) | |
| start_d = date.fromisoformat(start) | |
| end_d = date.fromisoformat(end) | |
| days = (end_d - start_d).days + 1 | |
| for i in range(days): | |
| d = (start_d + timedelta(days=i)).isoformat() | |
| for code, _ in REGIONS: | |
| for sku, category, _nm, _price in PRODUCTS: | |
| base = {"Electronics":400,"Apparel":800,"Grocery":600,"Home":300}[category] | |
| noise = rnd.randint(-30, 30) | |
| on_hand = max(0, base + noise - i*2) | |
| cur.execute( | |
| "INSERT INTO inv_stock(day, region_code, sku, on_hand_qty) VALUES (?,?,?,?)", | |
| (d, code, sku, on_hand) | |
| ) | |
| # ---------------------- helpers + read-only executor ---------------------- | |
| def region_codes(self) -> List[str]: | |
| rows = self.conn.execute("SELECT code FROM dim_region").fetchall() | |
| return [r[0] for r in rows] | |
| def execute_sql_readonly(self, sql: str) -> Dict[str, Any]: | |
| """Hard safety: allow only a single SELECT statement; no comments/CTEs with semicolons.""" | |
| s = sql.strip() | |
| bad = (";", "--", "/*", "*/") | |
| if not s.lower().startswith("select"): | |
| raise ValueError("Only SELECT statements are allowed.") | |
| if any(tok in s for tok in bad): | |
| raise ValueError("Disallowed token in SQL.") | |
| cur = self.conn.cursor() | |
| cur.execute(s) | |
| cols = [d[0] for d in cur.description] if cur.description else [] | |
| rows = cur.fetchall() | |
| return {"columns": cols, "rows": rows, "rowcount": len(rows)} | |
| if __name__ == "__main__": | |
| import argparse | |
| import os | |
| import sys | |
| from pathlib import Path | |
| parser = argparse.ArgumentParser( | |
| description="Create and persist the demo enterprise SQLite database." | |
| ) | |
| parser.add_argument( | |
| "--db", | |
| default=os.getenv("SQLITE_PATH", "./demo_enterprise.sqlite"), | |
| help="Path to the SQLite DB file to create (defaults to ./demo_enterprise.sqlite or $SQLITE_PATH).", | |
| ) | |
| parser.add_argument( | |
| "--start", | |
| default="2025-08-10", | |
| help="Start date (YYYY-MM-DD) for seeding data.", | |
| ) | |
| parser.add_argument( | |
| "--end", | |
| default="2025-09-10", | |
| help="End date (YYYY-MM-DD) for seeding data.", | |
| ) | |
| parser.add_argument( | |
| "--seed", | |
| type=int, | |
| default=42, | |
| help="Random seed used for deterministic seeding.", | |
| ) | |
| args = parser.parse_args() | |
| # Ensure we persist to disk (avoid in-memory DB). | |
| db_path = args.db | |
| if db_path == ":memory:": | |
| print("':memory:' was requested; switching to ./demo_enterprise.sqlite so the DB is saved to disk.") | |
| db_path = "./demo_enterprise.db" | |
| # Make sure the parent directory exists | |
| parent = Path(db_path).expanduser().resolve().parent | |
| parent.mkdir(parents=True, exist_ok=True) | |
| tool = SQLTool(db_path=str(Path(db_path).expanduser())) | |
| tool.setup_demo_enterprise(start=args.start, end=args.end, seed=args.seed) | |
| # Flush and close | |
| tool.conn.commit() | |
| tool.conn.close() | |
| print(f"✅ Database created at: {Path(db_path).expanduser().resolve()}") | |
| print(f" Seed window: {args.start} → {args.end} | seed={args.seed}") | |
| # # Default location ./demo_enterprise.sqlite | |
| # python app/tools/sql_tool.py | |
| # # Custom location and date range | |
| # python BI_Assistant_Backend/app/tools/sql_tool.py --db ./data/retail_demo.sqlite --start 2025-08-01 --end 2025-09-10 --seed 123 | |