Spaces:
Sleeping
Sleeping
| import re | |
| import pandas as pd | |
| MONTHS = { | |
| "january": "01", "february": "02", "march": "03", "april": "04", "may": "05", "june": "06", | |
| "july": "07", "august": "08", "september": "09", "october": "10", "november": "11", "december": "12" | |
| } | |
| def normalize(text: str) -> str: | |
| return re.sub(r"\s+", " ", text.strip().lower()) | |
| def extract_top_n(q: str, default=5): | |
| m = re.search(r"top\s+(\d+)", q) | |
| return int(m.group(1)) if m else default | |
| def month_filter(q: str): | |
| for name, nr in MONTHS.items(): | |
| if name in q: | |
| year_match = re.search(r"(20\d{2})", q) | |
| year = year_match.group(1) if year_match else "2026" | |
| return f"WHERE substr(o.order_date, 1, 7) = '{year}-{nr}'" | |
| return "" | |
| def category_filter(q: str): | |
| categories = ["electronics", "furniture", "accessories", "stationery"] | |
| for c in categories: | |
| if c in q: | |
| return c.title() | |
| return None | |
| def parse_question_to_sql(question: str): | |
| q = normalize(question) | |
| notes = [] | |
| if "schema" in q or "tables" in q: | |
| sql = "SELECT 'customers' AS table_name UNION ALL SELECT 'products' UNION ALL SELECT 'orders' UNION ALL SELECT 'order_items';" | |
| notes.append("Interpreted as a schema discovery question.") | |
| return sql, notes | |
| if "show all customers" in q or ("customers" in q and ("list" in q or "show" in q or q == "customers")): | |
| sql = "SELECT * FROM customers ORDER BY id;" | |
| notes.append("Mapped to a simple customer listing.") | |
| return sql, notes | |
| if ("products" in q or "product" in q) and ("category" in q or "in the" in q or "list" in q or "show" in q): | |
| category = category_filter(q) | |
| if category: | |
| sql = f"SELECT * FROM products WHERE category = '{category}' ORDER BY id;" | |
| notes.append(f"Detected product category filter: {category}.") | |
| return sql, notes | |
| if "orders from" in q or ("orders" in q and any(m in q for m in MONTHS)): | |
| filt = month_filter(q) | |
| sql = f"SELECT o.*, c.name AS customer_name FROM orders o JOIN customers c ON c.id = o.customer_id {filt} ORDER BY o.order_date;" | |
| notes.append("Detected an order date filter.") | |
| return sql, notes | |
| if "delayed" in q and ("how many" in q or "count" in q): | |
| sql = "SELECT COUNT(*) AS delayed_orders FROM orders WHERE status = 'delayed';" | |
| notes.append("Mapped to delayed order count.") | |
| return sql, notes | |
| if "revenue by country" in q or ("total revenue" in q and "country" in q): | |
| sql = """ | |
| SELECT c.country, | |
| ROUND(SUM(oi.quantity * oi.unit_price), 2) AS revenue | |
| FROM order_items oi | |
| JOIN orders o ON o.id = oi.order_id | |
| JOIN customers c ON c.id = o.customer_id | |
| GROUP BY c.country | |
| ORDER BY revenue DESC; | |
| """.strip() | |
| notes.append("Revenue interpreted as SUM(quantity * unit_price).") | |
| return sql, notes | |
| if "revenue by month" in q or "sales by month" in q: | |
| sql = """ | |
| SELECT substr(o.order_date, 1, 7) AS month, | |
| ROUND(SUM(oi.quantity * oi.unit_price), 2) AS revenue | |
| FROM order_items oi | |
| JOIN orders o ON o.id = oi.order_id | |
| GROUP BY substr(o.order_date, 1, 7) | |
| ORDER BY month; | |
| """.strip() | |
| notes.append("Grouped by order month.") | |
| return sql, notes | |
| if "top" in q and "product" in q and "revenue" in q: | |
| n = extract_top_n(q, default=5) | |
| sql = f""" | |
| SELECT p.name, | |
| ROUND(SUM(oi.quantity * oi.unit_price), 2) AS revenue | |
| FROM order_items oi | |
| JOIN products p ON p.id = oi.product_id | |
| GROUP BY p.name | |
| ORDER BY revenue DESC | |
| LIMIT {n}; | |
| """.strip() | |
| notes.append(f"Detected ranking request with top {n}.") | |
| return sql, notes | |
| if "average order value by customer" in q or ("average" in q and "customer" in q and "order value" in q): | |
| sql = """ | |
| WITH order_totals AS ( | |
| SELECT o.id AS order_id, | |
| c.name AS customer_name, | |
| SUM(oi.quantity * oi.unit_price) AS order_total | |
| FROM orders o | |
| JOIN customers c ON c.id = o.customer_id | |
| JOIN order_items oi ON oi.order_id = o.id | |
| GROUP BY o.id, c.name | |
| ) | |
| SELECT customer_name, | |
| ROUND(AVG(order_total), 2) AS avg_order_value | |
| FROM order_totals | |
| GROUP BY customer_name | |
| ORDER BY avg_order_value DESC; | |
| """.strip() | |
| notes.append("Calculated average from per-order totals.") | |
| return sql, notes | |
| if "all orders" in q or ("orders" in q and ("list" in q or "show" in q)): | |
| sql = "SELECT * FROM orders ORDER BY order_date;" | |
| notes.append("Mapped to a full orders listing.") | |
| return sql, notes | |
| sql = "SELECT 'Sorry, I do not understand that question yet.' AS message;" | |
| notes.append("No rule matched; returned a fallback query.") | |
| return sql, notes | |
| def format_answer(question: str, sql: str, df: pd.DataFrame, notes): | |
| preview = df.head(20).to_markdown(index=False) if not df.empty else "(no rows returned)" | |
| explanation = "\n".join(f"- {n}" for n in notes) if notes else "- No special interpretation notes." | |
| return ( | |
| f"I translated your question into this SQL:\n\n" | |
| f"```sql\n{sql}\n```\n\n" | |
| f"Interpretation notes:\n{explanation}\n\n" | |
| f"Result preview ({len(df)} row(s)):\n\n{preview}" | |
| ) | |