| """ | |
| NeoSyn - query_engine.py | |
| This module processes natural language queries to determine user intent: | |
| - Whether to generate synthetic data | |
| - Whether to summarize data | |
| - Whether to plot distributions | |
| Author: Saivivek Katkuri | |
| Date: June 2025 | |
| """ | |
| import re | |
| def parse_query(query: str) -> dict: | |
| """ | |
| Parses the user query and extracts intent, table, column, and sample size. | |
| Args: | |
| query (str): User's natural language query. | |
| Returns: | |
| dict: Dictionary with intent, table, column (if any), and num_samples. | |
| """ | |
| intent = None | |
| table = None | |
| column = None | |
| num_samples = 10 # Default samples | |
| # Lowercase for easy matching | |
| q = query.lower() | |
| # Determine intent | |
| if "generate" in q or "synthetic" in q: | |
| intent = "generate" | |
| elif "summarize" in q or "summary" in q: | |
| intent = "summarize" | |
| elif "plot" in q or "distribution" in q or "chart" in q: | |
| intent = "plot" | |
| else: | |
| intent = "unknown" | |
| # Extract table name from query (simple match for demo) | |
| for t in ["sales", "customers", "products", "transactions"]: | |
| if t in q: | |
| table = t | |
| break | |
| # Extract column name (optional, e.g., "plot price distribution") | |
| match = re.search(r"(plot|distribution|of)\s+(\w+)", q) | |
| if match: | |
| column = match.group(2) | |
| # Extract number of samples if mentioned (e.g., "generate 500 samples") | |
| match_samples = re.search(r"(\d+)\s+(samples|records|rows)", q) | |
| if match_samples: | |
| num_samples = int(match_samples.group(1)) | |
| return { | |
| "intent": intent, | |
| "table": table, | |
| "column": column, | |
| "num_samples": num_samples | |
| } |