File size: 13,047 Bytes
bebb279
 
6f18489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cef617
bebb279
 
 
 
 
2cef617
6f18489
bebb279
 
 
 
 
6f18489
bebb279
 
 
 
6f18489
 
2cef617
 
bebb279
2cef617
 
 
 
6f18489
 
2cef617
6f18489
 
 
 
 
 
 
 
 
 
 
 
bebb279
 
 
 
 
 
 
6f18489
 
 
 
 
 
 
 
 
bebb279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f18489
bebb279
 
 
 
6f18489
 
 
 
 
 
 
 
 
 
 
 
 
bebb279
 
 
 
 
bd7254f
bebb279
 
 
 
 
6f18489
 
bebb279
 
 
 
 
 
 
 
 
6f18489
2cef617
 
bebb279
6f18489
 
 
 
bebb279
 
6f18489
 
 
 
 
 
 
 
 
 
bebb279
6f18489
 
 
 
 
bebb279
 
6f18489
 
 
 
 
bebb279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd7254f
bebb279
 
 
 
 
 
6f18489
 
 
 
bebb279
 
 
 
 
 
 
 
 
 
 
 
 
95bb394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# app/tools/sql_tool.py
import os, sqlite3, random, math, string
from datetime import date, timedelta
from typing import Optional, Dict, Any, List, Tuple

REGIONS: List[Tuple[str, str]] = [
    ("NCR", "Delhi NCR"),
    ("BLR", "Bengaluru"),
    ("MUM", "Mumbai"),
    ("HYD", "Hyderabad"),
    ("CHN", "Chennai"),
    ("PUN", "Pune"),
]

PRODUCTS: List[Tuple[str, str, str, float]] = [
    ("ELEC-001", "Electronics", "Smartphone Alpha", 29999.0),
    ("ELEC-002", "Electronics", "Smartphone Pro",   49999.0),
    ("ELEC-003", "Electronics", "Laptop 14\"",      65999.0),
    ("ELEC-004", "Electronics", "Earbuds",           3999.0),
    ("APP-001",  "Apparel",     "Athleisure Tee",     999.0),
    ("APP-002",  "Apparel",     "Formal Shirt",      1599.0),
    ("APP-003",  "Apparel",     "Denim Jeans",       2499.0),
    ("GROC-001", "Grocery",     "Olive Oil 1L",       799.0),
    ("GROC-002", "Grocery",     "Basmati 5kg",       899.0),
    ("GROC-003", "Grocery",     "Almonds 1kg",      1199.0),
    ("HOME-001", "Home",        "Mixer Grinder",     3499.0),
    ("HOME-002", "Home",        "Air Fryer",         6999.0),
]

def _rand_name(rnd: random.Random):
    first = ["Ramesh","Suresh","Mahesh","Amit","Priya","Anita","Kiran","Sunil","Neha","Pooja","Ravi","Vijay","Anil","Meera","Tarun"]
    last  = ["Kumar","Sharma","Patel","Verma","Reddy","Iyer","Das","Ghosh","Yadav","Gupta","Singh","Menon"]
    return f"{rnd.choice(first)} {rnd.choice(last)}"

class SQLTool:
    """
    Enterprise-ish SQLite schema with dims/facts and safe read-only SQL execution.

      dim_region(code, name)
      dim_product(sku, category, name, price)
      dim_employee(emp_id, name, region_code, role, hire_date)

      fact_sales(day, region_code, sku, channel, units, revenue)          -- daily aggregates
      fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue)  -- retail split by employee

      inv_stock(day, region_code, sku, on_hand_qty)
    """

    def __init__(self, db_path: Optional[str] = None):
        path = db_path or os.getenv("SQLITE_PATH", ":memory:")
        self.path = path
        self.conn = sqlite3.connect(path, check_same_thread=False)
        self.conn.execute("PRAGMA journal_mode=WAL;")
        self.conn.execute("PRAGMA synchronous=NORMAL;")

    # ---------------------- schema & seed ----------------------
    def setup_demo_enterprise(self, start: str = "2025-08-10", end: str = "2025-09-10", seed: int = 42):
        cur = self.conn.cursor()
        cur.executescript(
            """
            CREATE TABLE IF NOT EXISTS dim_region (
              code TEXT PRIMARY KEY,
              name TEXT NOT NULL
            );
            CREATE TABLE IF NOT EXISTS dim_product (
              sku TEXT PRIMARY KEY,
              category TEXT NOT NULL,
              name TEXT NOT NULL,
              price REAL NOT NULL
            );
            CREATE TABLE IF NOT EXISTS dim_employee (
              emp_id TEXT PRIMARY KEY,
              name TEXT NOT NULL,
              region_code TEXT NOT NULL REFERENCES dim_region(code),
              role TEXT NOT NULL,
              hire_date TEXT NOT NULL
            );
            CREATE TABLE IF NOT EXISTS fact_sales (
              day TEXT NOT NULL,
              region_code TEXT NOT NULL REFERENCES dim_region(code),
              sku TEXT NOT NULL REFERENCES dim_product(sku),
              channel TEXT NOT NULL,
              units INTEGER NOT NULL,
              revenue REAL NOT NULL,
              PRIMARY KEY (day, region_code, sku, channel)
            );
            CREATE TABLE IF NOT EXISTS fact_sales_detail (
              day TEXT NOT NULL,
              region_code TEXT NOT NULL REFERENCES dim_region(code),
              sku TEXT NOT NULL REFERENCES dim_product(sku),
              channel TEXT NOT NULL,
              employee_id TEXT NULL REFERENCES dim_employee(emp_id),
              units INTEGER NOT NULL,
              revenue REAL NOT NULL
            );
            CREATE TABLE IF NOT EXISTS inv_stock (
              day TEXT NOT NULL,
              region_code TEXT NOT NULL REFERENCES dim_region(code),
              sku TEXT NOT NULL REFERENCES dim_product(sku),
              on_hand_qty INTEGER NOT NULL,
              PRIMARY KEY (day, region_code, sku)
            );

            CREATE INDEX IF NOT EXISTS idx_sales_day          ON fact_sales(day);
            CREATE INDEX IF NOT EXISTS idx_sales_region       ON fact_sales(region_code);
            CREATE INDEX IF NOT EXISTS idx_sales_sku          ON fact_sales(sku);
            CREATE INDEX IF NOT EXISTS idx_sales_day_region   ON fact_sales(day, region_code);

            CREATE INDEX IF NOT EXISTS idx_detail_day_region  ON fact_sales_detail(day, region_code);
            CREATE INDEX IF NOT EXISTS idx_detail_emp         ON fact_sales_detail(employee_id);

            CREATE INDEX IF NOT EXISTS idx_stock_day_region   ON inv_stock(day, region_code);
            """
        )

        existing = set(r[0] for r in cur.execute("SELECT code FROM dim_region"))
        to_ins = [(c, n) for c, n in REGIONS if c not in existing]
        if to_ins:
            cur.executemany("INSERT INTO dim_region(code, name) VALUES (?,?)", to_ins)

        existing_sku = set(r[0] for r in cur.execute("SELECT sku FROM dim_product"))
        to_ins_p = [p for p in PRODUCTS if p[0] not in existing_sku]
        if to_ins_p:
            cur.executemany("INSERT INTO dim_product(sku, category, name, price) VALUES (?,?,?,?)", to_ins_p)

        emp_count = cur.execute("SELECT COUNT(*) FROM dim_employee").fetchone()[0]
        rnd = random.Random(seed)
        if emp_count == 0:
            rows = []
            for code, _ in REGIONS:
                n = 8
                for _ in range(n):
                    emp_id = f"E{code}{rnd.randint(1000,9999)}"
                    rows.append((emp_id, _rand_name(rnd), code, rnd.choice(["AE","SE","AM"]), "2023-01-01"))
            cur.executemany("INSERT INTO dim_employee(emp_id,name,region_code,role,hire_date) VALUES (?,?,?,?,?)", rows)

        n_sales = cur.execute("SELECT COUNT(*) FROM fact_sales").fetchone()[0]
        if n_sales == 0:
            self._seed_fact_sales(cur, start, end, seed)

        n_detail = cur.execute("SELECT COUNT(*) FROM fact_sales_detail").fetchone()[0]
        if n_detail == 0:
            self._seed_sales_detail(cur, seed)

        n_stock = cur.execute("SELECT COUNT(*) FROM inv_stock").fetchone()[0]
        if n_stock == 0:
            self._seed_stock(cur, start, end, seed)

        self.conn.commit()

    def _seed_fact_sales(self, cur: sqlite3.Cursor, start: str, end: str, seed: int):
        rnd = random.Random(seed)
        start_d = date.fromisoformat(start)
        end_d = date.fromisoformat(end)
        days = (end_d - start_d).days + 1
        region_factor = {"NCR":1.25,"MUM":1.15,"BLR":1.10,"HYD":0.95,"CHN":0.90,"PUN":0.85}
        channels = ["Online","Retail"]

        batch = []
        for i in range(days):
            d = (start_d + timedelta(days=i)).isoformat()
            wknd = (start_d + timedelta(days=i)).weekday() >= 5
            wknd_boost = 1.10 if wknd else 1.0
            for code, _name in REGIONS:
                rfac = region_factor[code]
                for sku, category, _nm, price in PRODUCTS:
                    for ch in channels:
                        base = {"Electronics":12,"Apparel":25,"Grocery":40,"Home":9}[category]
                        ch_mult = 1.15 if ch == "Online" else 0.95
                        mu = base * rfac * wknd_boost * ch_mult
                        sigma = max(1.0, mu * 0.25)
                        units = max(0, int(rnd.gauss(mu, sigma)))
                        season = 1.0 + 0.08 * math.sin(i / 3.5)
                        revenue = round(units * price * season, 2)
                        batch.append((d, code, sku, ch, units, revenue))
        cur.executemany(
            "INSERT INTO fact_sales(day, region_code, sku, channel, units, revenue) VALUES (?,?,?,?,?,?)",
            batch
        )

    def _seed_sales_detail(self, cur: sqlite3.Cursor, seed: int):
        rnd = random.Random(seed+7)
        rows = cur.execute(
            "SELECT day, region_code, sku, units, revenue FROM fact_sales WHERE channel='Retail'"
        ).fetchall()

        for d, region, sku, units, revenue in rows:
            if units == 0:
                continue
            emp_ids = [r[0] for r in cur.execute("SELECT emp_id FROM dim_employee WHERE region_code=?", (region,))]
            parts = rnd.randint(1, min(4, max(1, units)))
            cuts = sorted(rnd.sample(range(1, units), parts-1)) if units > parts else []
            splits = [b-a for a,b in zip([0]+cuts, cuts+[units])]
            total_units = float(sum(splits))
            rev_splits = [round(revenue * (u/total_units), 2) for u in splits]
            if rev_splits:
                drift = round(revenue - sum(rev_splits), 2)
                rev_splits[0] += drift
            for u, r in zip(splits, rev_splits):
                emp = rnd.choice(emp_ids) if emp_ids else None
                cur.execute(
                    "INSERT INTO fact_sales_detail(day, region_code, sku, channel, employee_id, units, revenue) "
                    "VALUES (?,?,?,?,?,?,?)",
                    (d, region, sku, "Retail", emp, u, r)
                )

    def _seed_stock(self, cur: sqlite3.Cursor, start: str, end: str, seed: int):
        rnd = random.Random(seed+13)
        start_d = date.fromisoformat(start)
        end_d = date.fromisoformat(end)
        days = (end_d - start_d).days + 1

        for i in range(days):
            d = (start_d + timedelta(days=i)).isoformat()
            for code, _ in REGIONS:
                for sku, category, _nm, _price in PRODUCTS:
                    base = {"Electronics":400,"Apparel":800,"Grocery":600,"Home":300}[category]
                    noise = rnd.randint(-30, 30)
                    on_hand = max(0, base + noise - i*2)
                    cur.execute(
                        "INSERT INTO inv_stock(day, region_code, sku, on_hand_qty) VALUES (?,?,?,?)",
                        (d, code, sku, on_hand)
                    )

    # ---------------------- helpers + read-only executor ----------------------
    def region_codes(self) -> List[str]:
        rows = self.conn.execute("SELECT code FROM dim_region").fetchall()
        return [r[0] for r in rows]

    def execute_sql_readonly(self, sql: str) -> Dict[str, Any]:
        """Hard safety: allow only a single SELECT statement; no comments/CTEs with semicolons."""
        s = sql.strip()
        bad = (";", "--", "/*", "*/")
        if not s.lower().startswith("select"):
            raise ValueError("Only SELECT statements are allowed.")
        if any(tok in s for tok in bad):
            raise ValueError("Disallowed token in SQL.")
        cur = self.conn.cursor()
        cur.execute(s)
        cols = [d[0] for d in cur.description] if cur.description else []
        rows = cur.fetchall()
        return {"columns": cols, "rows": rows, "rowcount": len(rows)}

if __name__ == "__main__":
    import argparse
    import os
    import sys
    from pathlib import Path

    parser = argparse.ArgumentParser(
        description="Create and persist the demo enterprise SQLite database."
    )
    parser.add_argument(
        "--db",
        default=os.getenv("SQLITE_PATH", "./demo_enterprise.sqlite"),
        help="Path to the SQLite DB file to create (defaults to ./demo_enterprise.sqlite or $SQLITE_PATH).",
    )
    parser.add_argument(
        "--start",
        default="2025-08-10",
        help="Start date (YYYY-MM-DD) for seeding data.",
    )
    parser.add_argument(
        "--end",
        default="2025-09-10",
        help="End date (YYYY-MM-DD) for seeding data.",
    )
    parser.add_argument(
        "--seed",
        type=int,
        default=42,
        help="Random seed used for deterministic seeding.",
    )
    args = parser.parse_args()

    # Ensure we persist to disk (avoid in-memory DB).
    db_path = args.db
    if db_path == ":memory:":
        print("':memory:' was requested; switching to ./demo_enterprise.sqlite so the DB is saved to disk.")
        db_path = "./demo_enterprise.db"

    # Make sure the parent directory exists
    parent = Path(db_path).expanduser().resolve().parent
    parent.mkdir(parents=True, exist_ok=True)

    tool = SQLTool(db_path=str(Path(db_path).expanduser()))
    tool.setup_demo_enterprise(start=args.start, end=args.end, seed=args.seed)

    # Flush and close
    tool.conn.commit()
    tool.conn.close()

    print(f"✅ Database created at: {Path(db_path).expanduser().resolve()}")
    print(f"   Seed window: {args.start}{args.end} | seed={args.seed}")

# # Default location ./demo_enterprise.sqlite
# python app/tools/sql_tool.py

# # Custom location and date range
# python BI_Assistant_Backend/app/tools/sql_tool.py --db ./data/retail_demo.sqlite --start 2025-08-01 --end 2025-09-10 --seed 123