sql-agent-openenv / backend /env /database.py
ar9avg's picture
Initial submission: SQL Agent OpenEnv for Meta+HF hackathon
3c665d2
raw
history blame
20.3 kB
"""
SQLite database setup and schema for the benchmark marketplace.
Tables:
sellers (id, name, email, country, rating)
users (id, name, email, created_at, country)
products (id, name, category, price, stock_quantity, seller_id)
orders (id, user_id, product_id, quantity, total_price, status, created_at)
reviews (id, user_id, product_id, rating, comment, created_at)
~50 rows per table of realistic seed data.
"""
from __future__ import annotations
import os
import sqlite3
from pathlib import Path
from typing import Any
_DATA_DIR = Path(os.environ.get("DATA_DIR", Path(__file__).parent.parent / "data"))
DB_PATH = _DATA_DIR / "benchmark.db"
# ─── Schema ───────────────────────────────────────────────────────
_DDL = """
CREATE TABLE IF NOT EXISTS sellers (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT NOT NULL UNIQUE,
country TEXT NOT NULL,
rating REAL NOT NULL DEFAULT 4.0
);
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
email TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL,
country TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS products (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
category TEXT NOT NULL,
price REAL NOT NULL,
stock_quantity INTEGER NOT NULL DEFAULT 0,
seller_id INTEGER NOT NULL REFERENCES sellers(id)
);
CREATE TABLE IF NOT EXISTS orders (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
product_id INTEGER NOT NULL REFERENCES products(id),
quantity INTEGER NOT NULL DEFAULT 1,
total_price REAL NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
created_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS reviews (
id INTEGER PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
product_id INTEGER NOT NULL REFERENCES products(id),
rating INTEGER NOT NULL CHECK(rating BETWEEN 1 AND 5),
comment TEXT,
created_at TEXT NOT NULL
);
"""
# ─── Seed Data ────────────────────────────────────────────────────
_SELLERS = [
(1, "TechGadgets Inc", "contact@techgadgets.com", "USA", 4.8),
(2, "FashionHub", "info@fashionhub.co.uk", "UK", 4.5),
(3, "HomeDecor Pro", "sales@homedecopro.de", "Germany", 4.3),
(4, "SportZone", "hello@sportzone.fr", "France", 4.6),
(5, "BookWorld", "support@bookworld.ca", "Canada", 4.9),
(6, "ElectroMart", "contact@electromart.jp", "Japan", 4.7),
(7, "GreenGrocer", "team@greengrocer.au", "Australia", 4.4),
(8, "KidsToys Hub", "info@kidstoys.us", "USA", 4.2),
(9, "PetSupplies Co", "hello@petsupplies.nl", "Netherlands", 4.6),
(10, "OfficeSupply Plus", "contact@officesupply.sg", "Singapore", 4.1),
]
_USERS = [
(1, "Alice Johnson", "alice@example.com", "2023-01-15", "USA"),
(2, "Bob Smith", "bob@example.com", "2023-02-10", "UK"),
(3, "Carol White", "carol@example.com", "2023-03-05", "Canada"),
(4, "David Brown", "david@example.com", "2023-03-20", "Germany"),
(5, "Emma Davis", "emma@example.com", "2023-04-12", "France"),
(6, "Frank Miller", "frank@example.com", "2023-05-01", "Australia"),
(7, "Grace Wilson", "grace@example.com", "2023-05-18", "Japan"),
(8, "Henry Taylor", "henry@example.com", "2023-06-03", "USA"),
(9, "Isabella Anderson", "isabella@example.com", "2023-06-25", "UK"),
(10, "Jack Martinez", "jack@example.com", "2023-07-09", "Spain"),
(11, "Karen Thomas", "karen@example.com", "2023-07-22", "Italy"),
(12, "Liam Jackson", "liam@example.com", "2023-08-04", "Brazil"),
(13, "Mia Harris", "mia@example.com", "2023-08-17", "Canada"),
(14, "Noah Martin", "noah@example.com", "2023-09-01", "USA"),
(15, "Olivia Garcia", "olivia@example.com", "2023-09-14", "Mexico"),
(16, "Paul Robinson", "paul@example.com", "2023-10-02", "Australia"),
(17, "Quinn Lewis", "quinn@example.com", "2023-10-20", "New Zealand"),
(18, "Rachel Walker", "rachel@example.com", "2023-11-05", "UK"),
(19, "Sam Hall", "sam@example.com", "2023-11-19", "USA"),
(20, "Tina Allen", "tina@example.com", "2023-12-01", "Germany"),
(21, "Umar Young", "umar@example.com", "2024-01-08", "Pakistan"),
(22, "Vera Hernandez", "vera@example.com", "2024-01-22", "Spain"),
(23, "Will King", "will@example.com", "2024-02-06", "USA"),
(24, "Xena Wright", "xena@example.com", "2024-02-20", "Canada"),
(25, "Yusuf Lopez", "yusuf@example.com", "2024-03-05", "Morocco"),
(26, "Zoe Hill", "zoe@example.com", "2024-03-19", "UK"),
(27, "Aaron Scott", "aaron@example.com", "2024-04-02", "USA"),
(28, "Bella Green", "bella@example.com", "2024-04-16", "Australia"),
(29, "Carlos Adams", "carlos@example.com", "2024-05-01", "Brazil"),
(30, "Diana Baker", "diana@example.com", "2024-05-15", "Canada"),
(31, "Ethan Gonzalez", "ethan@example.com", "2024-05-29", "USA"),
(32, "Fatima Nelson", "fatima@example.com", "2024-06-12", "Nigeria"),
(33, "George Carter", "george@example.com", "2024-06-26", "UK"),
(34, "Hannah Mitchell", "hannah@example.com", "2024-07-10", "Germany"),
(35, "Ivan Perez", "ivan@example.com", "2024-07-24", "Russia"),
(36, "Julia Roberts", "juliar@example.com", "2024-08-07", "USA"),
(37, "Kevin Turner", "kevin@example.com", "2024-08-21", "Canada"),
(38, "Luna Phillips", "luna@example.com", "2024-09-04", "France"),
(39, "Mike Campbell", "mike@example.com", "2024-09-18", "USA"),
(40, "Nancy Parker", "nancy@example.com", "2024-10-02", "Japan"),
(41, "Oscar Evans", "oscar@example.com", "2024-10-16", "UK"),
(42, "Penny Edwards", "penny@example.com", "2024-10-30", "Australia"),
(43, "Roy Collins", "roy@example.com", "2024-11-13", "USA"),
(44, "Sara Stewart", "sara@example.com", "2024-11-27", "Canada"),
(45, "Tom Morris", "tom@example.com", "2024-12-11", "UK"),
(46, "Uma Rogers", "uma@example.com", "2024-12-25", "India"),
(47, "Victor Reed", "victor@example.com", "2025-01-08", "USA"),
(48, "Wendy Cook", "wendy@example.com", "2025-01-22", "Germany"),
(49, "Xavier Morgan", "xavier@example.com", "2025-02-05", "France"),
(50, "Yasmin Bell", "yasmin@example.com", "2025-02-19", "UK"),
]
_PRODUCTS = [
(1, "Wireless Headphones Pro", "Electronics", 149.99, 120, 1),
(2, "Laptop Stand Adjustable", "Electronics", 49.99, 200, 1),
(3, "USB-C Hub 7-in-1", "Electronics", 39.99, 350, 6),
(4, "Mechanical Keyboard RGB", "Electronics", 89.99, 85, 6),
(5, "Webcam 4K Ultra", "Electronics", 129.99, 60, 1),
(6, "Summer Floral Dress", "Fashion", 59.99, 180, 2),
(7, "Men Slim Fit Chinos", "Fashion", 44.99, 220, 2),
(8, "Leather Wallet Bifold", "Fashion", 34.99, 300, 2),
(9, "Running Shoes Ultralight", "Fashion", 109.99, 95, 4),
(10, "Yoga Pants High Waist", "Fashion", 54.99, 150, 4),
(11, "Ceramic Vase Set", "Home & Garden", 79.99, 70, 3),
(12, "Bamboo Cutting Board", "Home & Garden", 29.99, 400, 3),
(13, "Scented Candle Collection", "Home & Garden", 24.99, 500, 3),
(14, "Smart LED Bulb Pack", "Home & Garden", 59.99, 250, 1),
(15, "Coffee Table Book Stand", "Home & Garden", 49.99, 130, 3),
(16, "Protein Powder Vanilla", "Sports & Fitness", 54.99, 210, 4),
(17, "Resistance Band Set", "Sports & Fitness", 24.99, 600, 4),
(18, "Yoga Mat Non-Slip", "Sports & Fitness", 39.99, 300, 4),
(19, "Tennis Racket Pro", "Sports & Fitness", 89.99, 45, 4),
(20, "Water Bottle Insulated", "Sports & Fitness", 29.99, 450, 7),
(21, "The Python Handbook", "Books", 29.99, 200, 5),
(22, "Machine Learning Basics", "Books", 34.99, 175, 5),
(23, "Data Structures Guide", "Books", 27.99, 220, 5),
(24, "Mystery Novel Collection", "Books", 49.99, 100, 5),
(25, "Children Story Box Set", "Books", 44.99, 130, 8),
(26, "Dog Bed Orthopedic", "Pet Supplies", 79.99, 90, 9),
(27, "Cat Scratching Post", "Pet Supplies", 34.99, 170, 9),
(28, "Fish Tank Starter Kit", "Pet Supplies", 59.99, 55, 9),
(29, "Bird Cage Deluxe", "Pet Supplies", 89.99, 35, 9),
(30, "Pet Grooming Kit", "Pet Supplies", 39.99, 140, 9),
(31, "LEGO City Set 600pcs", "Toys", 69.99, 80, 8),
(32, "Remote Control Car", "Toys", 49.99, 120, 8),
(33, "Board Game Strategy", "Toys", 34.99, 200, 8),
(34, "Puzzle 1000 Pieces", "Toys", 24.99, 350, 8),
(35, "Art & Craft Kit Kids", "Toys", 29.99, 280, 8),
(36, "Office Desk Organizer", "Office", 39.99, 300, 10),
(37, "Wireless Mouse Ergonomic", "Electronics", 59.99, 200, 6),
(38, "Notebook Set Premium", "Office", 19.99, 600, 10),
(39, "Sticky Notes Colorful", "Office", 9.99, 800, 10),
(40, "Printer Paper Ream", "Office", 14.99, 500, 10),
(41, "Smart Watch Fitness", "Electronics", 199.99, 75, 1),
(42, "Blender High Power", "Home & Garden", 89.99, 110, 3),
(43, "Air Purifier HEPA", "Home & Garden", 149.99, 65, 1),
(44, "Backpack Waterproof", "Fashion", 79.99, 160, 2),
(45, "Sunglasses Polarized", "Fashion", 69.99, 200, 2),
(46, "Dumbbells Set 20kg", "Sports & Fitness", 79.99, 85, 4),
(47, "Jump Rope Speed", "Sports & Fitness", 19.99, 400, 4),
(48, "Graphic Novel Bundle", "Books", 59.99, 90, 5),
(49, "Phone Stand Adjustable", "Electronics", 24.99, 350, 6),
(50, "Desk Lamp LED", "Office", 44.99, 230, 10),
]
_ORDERS = [
(1, 1, 1, 1, 149.99, "delivered", "2024-01-10"),
(2, 2, 6, 2, 119.98, "delivered", "2024-01-15"),
(3, 3, 21, 1, 29.99, "delivered", "2024-01-20"),
(4, 4, 11, 1, 79.99, "delivered", "2024-01-25"),
(5, 5, 16, 2, 109.98, "delivered", "2024-02-01"),
(6, 6, 31, 1, 69.99, "delivered", "2024-02-05"),
(7, 7, 3, 2, 79.98, "shipped", "2024-02-10"),
(8, 8, 41, 1, 199.99, "delivered", "2024-02-14"),
(9, 9, 26, 1, 79.99, "delivered", "2024-02-18"),
(10, 10, 17, 3, 74.97, "delivered", "2024-02-22"),
(11, 11, 22, 1, 34.99, "delivered", "2024-03-01"),
(12, 12, 7, 1, 44.99, "delivered", "2024-03-05"),
(13, 13, 18, 2, 79.98, "delivered", "2024-03-10"),
(14, 14, 37, 1, 59.99, "shipped", "2024-03-14"),
(15, 15, 44, 1, 79.99, "delivered", "2024-03-18"),
(16, 16, 2, 1, 49.99, "delivered", "2024-03-22"),
(17, 17, 50, 1, 44.99, "pending", "2024-03-26"),
(18, 18, 5, 1, 129.99, "delivered", "2024-04-01"),
(19, 19, 12, 2, 59.98, "delivered", "2024-04-05"),
(20, 20, 33, 1, 34.99, "delivered", "2024-04-09"),
(21, 21, 9, 1, 109.99, "delivered", "2024-04-13"),
(22, 22, 14, 2, 119.98, "delivered", "2024-04-17"),
(23, 23, 43, 1, 149.99, "shipped", "2024-04-21"),
(24, 24, 25, 1, 44.99, "delivered", "2024-04-25"),
(25, 25, 8, 2, 69.98, "delivered", "2024-04-29"),
(26, 26, 4, 1, 89.99, "delivered", "2024-05-03"),
(27, 27, 29, 1, 89.99, "delivered", "2024-05-07"),
(28, 28, 20, 3, 89.97, "delivered", "2024-05-11"),
(29, 29, 35, 2, 59.98, "delivered", "2024-05-15"),
(30, 30, 46, 1, 79.99, "pending", "2024-05-19"),
(31, 31, 13, 5, 124.95, "delivered", "2024-05-23"),
(32, 32, 36, 2, 79.98, "delivered", "2024-05-27"),
(33, 33, 48, 1, 59.99, "delivered", "2024-05-31"),
(34, 34, 1, 1, 149.99, "delivered", "2024-06-04"),
(35, 35, 24, 1, 49.99, "delivered", "2024-06-08"),
(36, 36, 10, 2, 109.98, "shipped", "2024-06-12"),
(37, 37, 42, 1, 89.99, "delivered", "2024-06-16"),
(38, 38, 27, 1, 34.99, "delivered", "2024-06-20"),
(39, 39, 6, 1, 59.99, "delivered", "2024-06-24"),
(40, 40, 41, 1, 199.99, "delivered", "2024-06-28"),
(41, 41, 19, 1, 89.99, "cancelled", "2024-07-02"),
(42, 42, 34, 2, 49.98, "delivered", "2024-07-06"),
(43, 43, 23, 1, 27.99, "delivered", "2024-07-10"),
(44, 44, 47, 3, 59.97, "delivered", "2024-07-14"),
(45, 45, 15, 1, 49.99, "delivered", "2024-07-18"),
(46, 46, 32, 1, 49.99, "delivered", "2024-07-22"),
(47, 47, 3, 1, 39.99, "pending", "2024-07-26"),
(48, 48, 28, 1, 59.99, "delivered", "2024-07-30"),
(49, 49, 39, 10, 99.90, "delivered", "2024-08-03"),
(50, 50, 21, 2, 59.98, "delivered", "2024-08-07"),
]
_REVIEWS = [
(1, 1, 1, 5, "Excellent headphones, crystal clear sound!", "2024-01-15"),
(2, 2, 6, 4, "Beautiful dress, fits perfectly.", "2024-01-20"),
(3, 3, 21, 5, "Best Python book for beginners.", "2024-01-25"),
(4, 4, 11, 4, "Very elegant vase set.", "2024-01-30"),
(5, 5, 16, 3, "Decent protein powder, average taste.", "2024-02-05"),
(6, 6, 31, 5, "My kid loves this LEGO set!", "2024-02-10"),
(7, 7, 3, 5, "Incredibly useful USB hub.", "2024-02-15"),
(8, 8, 41, 5, "Smart watch exceeded expectations.", "2024-02-20"),
(9, 9, 26, 4, "Dog loves the orthopedic bed.", "2024-02-25"),
(10, 10, 17, 5, "Great resistance bands, very durable.", "2024-03-01"),
(11, 11, 22, 4, "Solid ML intro book.", "2024-03-06"),
(12, 12, 7, 3, "Chinos are OK, sizing runs small.", "2024-03-11"),
(13, 13, 18, 5, "Perfect yoga mat, non-slip is great.", "2024-03-16"),
(14, 14, 37, 4, "Smooth wireless mouse.", "2024-03-21"),
(15, 15, 44, 5, "Waterproof backpack is amazing.", "2024-03-26"),
(16, 16, 2, 4, "Laptop stand is sturdy and adjustable.", "2024-03-31"),
(17, 17, 49, 3, "Decent phone stand but wobbly.", "2024-04-05"),
(18, 18, 5, 5, "Best webcam I've ever used.", "2024-04-10"),
(19, 19, 12, 5, "Bamboo cutting board is beautiful.", "2024-04-15"),
(20, 20, 33, 4, "Fun strategy board game.", "2024-04-20"),
(21, 21, 9, 5, "Running shoes are so comfortable!", "2024-04-25"),
(22, 22, 14, 4, "Smart bulbs work well with app.", "2024-04-30"),
(23, 23, 43, 4, "Air purifier is quiet and effective.", "2024-05-05"),
(24, 24, 25, 5, "Beautiful story box set for kids.", "2024-05-10"),
(25, 25, 8, 4, "Leather wallet is high quality.", "2024-05-15"),
(26, 26, 4, 5, "Mechanical keyboard is a joy to type on.", "2024-05-20"),
(27, 27, 29, 4, "Bird cage is spacious and well-made.", "2024-05-25"),
(28, 28, 20, 5, "Water bottle keeps drinks cold all day.", "2024-05-30"),
(29, 29, 35, 4, "Great art kit for kids.", "2024-06-04"),
(30, 30, 46, 4, "Solid dumbbells, good grip.", "2024-06-09"),
(31, 1, 13, 5, "Scented candles smell amazing.", "2024-06-14"),
(32, 2, 36, 4, "Desk organizer keeps my workspace tidy.", "2024-06-19"),
(33, 3, 48, 5, "Graphic novel bundle is worth every penny.", "2024-06-24"),
(34, 4, 1, 4, "Good headphones, comfy for long sessions.", "2024-06-29"),
(35, 5, 24, 5, "Love these mystery novels!", "2024-07-04"),
(36, 6, 10, 4, "High waist yoga pants are flattering.", "2024-07-09"),
(37, 7, 42, 4, "Powerful blender, handles frozen fruit.", "2024-07-14"),
(38, 8, 27, 5, "Cat scratching post is well built.", "2024-07-19"),
(39, 9, 6, 4, "Floral dress is as pictured.", "2024-07-24"),
(40, 10, 41, 5, "Smart watch has excellent battery life.", "2024-07-29"),
(41, 11, 19, 2, "Tennis racket feels cheap for the price.", "2024-08-03"),
(42, 12, 34, 5, "Puzzle is a perfect family activity.", "2024-08-08"),
(43, 13, 23, 5, "Data structures book is very clear.", "2024-08-13"),
(44, 14, 47, 4, "Jump rope is fast and durable.", "2024-08-18"),
(45, 15, 15, 3, "Book stand is okay, a bit light.", "2024-08-23"),
(46, 16, 32, 5, "Remote control car is very fast!", "2024-08-28"),
(47, 17, 3, 4, "USB hub works great on MacBook.", "2024-09-02"),
(48, 18, 28, 4, "Fish tank kit is easy to set up.", "2024-09-07"),
(49, 19, 38, 5, "Premium notebook has great paper.", "2024-09-12"),
(50, 20, 21, 5, "Python handbook is my go-to reference.", "2024-09-17"),
]
# ─── Public API ───────────────────────────────────────────────────
def get_db_path() -> Path:
return DB_PATH
def ensure_seeded() -> bool:
"""
Create the database and populate seed data if not already done.
Returns True if seed was needed (first run), False if already seeded.
"""
_DATA_DIR.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
try:
conn.executescript(_DDL)
conn.commit()
count = conn.execute("SELECT COUNT(*) FROM users").fetchone()[0]
if count >= 50:
return False # Already seeded
conn.execute("DELETE FROM reviews")
conn.execute("DELETE FROM orders")
conn.execute("DELETE FROM products")
conn.execute("DELETE FROM users")
conn.execute("DELETE FROM sellers")
conn.executemany(
"INSERT OR REPLACE INTO sellers VALUES (?,?,?,?,?)", _SELLERS
)
conn.executemany(
"INSERT OR REPLACE INTO users VALUES (?,?,?,?,?)", _USERS
)
conn.executemany(
"INSERT OR REPLACE INTO products VALUES (?,?,?,?,?,?)", _PRODUCTS
)
conn.executemany(
"INSERT OR REPLACE INTO orders VALUES (?,?,?,?,?,?,?)", _ORDERS
)
conn.executemany(
"INSERT OR REPLACE INTO reviews VALUES (?,?,?,?,?,?)", _REVIEWS
)
conn.commit()
return True
finally:
conn.close()
def get_schema_info() -> str:
"""
Return a concise textual schema summary for use in prompts.
"""
conn = sqlite3.connect(str(DB_PATH))
try:
lines = []
for table in ["sellers", "users", "products", "orders", "reviews"]:
info = conn.execute(f"PRAGMA table_info({table})").fetchall()
cols = ", ".join(
f"{col[1]} {col[2]}{'(PK)' if col[5] else ''}"
for col in info
)
row_count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
lines.append(f"Table: {table} ({row_count} rows)\n Columns: {cols}")
return "\n\n".join(lines)
finally:
conn.close()
def execute_query(sql: str) -> tuple[list[dict], str | None]:
"""
Execute a SQL query and return (rows, error_message).
rows is a list of dicts; error_message is None on success.
"""
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
try:
cursor = conn.execute(sql)
rows = [dict(row) for row in cursor.fetchall()]
return rows, None
except sqlite3.Error as e:
return [], str(e)
finally:
conn.close()
def get_table_stats() -> list[dict]:
"""Return [{name, rows}, ...] for all tables."""
conn = sqlite3.connect(str(DB_PATH))
try:
tables = ["sellers", "users", "products", "orders", "reviews"]
return [
{
"name": t,
"rows": conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0],
}
for t in tables
]
finally:
conn.close()
def get_schema_graph() -> dict:
"""Return schema graph with tables, columns, and foreign keys."""
conn = sqlite3.connect(str(DB_PATH))
try:
tables = []
for table in ["sellers", "users", "products", "orders", "reviews"]:
info = conn.execute(f"PRAGMA table_info({table})").fetchall()
columns = [
{"name": col[1], "type": col[2], "pk": bool(col[5])}
for col in info
]
tables.append({"name": table, "columns": columns})
foreign_keys = []
for table in ["sellers", "users", "products", "orders", "reviews"]:
fks = conn.execute(f"PRAGMA foreign_key_list({table})").fetchall()
for fk in fks:
foreign_keys.append(
{
"from_table": table,
"from_col": fk[3],
"to_table": fk[2],
"to_col": fk[4],
}
)
return {"tables": tables, "foreign_keys": foreign_keys}
finally:
conn.close()