""" NeoSyn - query_engine.py This module processes natural language queries to determine user intent: - Whether to generate synthetic data - Whether to summarize data - Whether to plot distributions Author: Saivivek Katkuri Date: June 2025 """ import re def parse_query(query: str) -> dict: """ Parses the user query and extracts intent, table, column, and sample size. Args: query (str): User's natural language query. Returns: dict: Dictionary with intent, table, column (if any), and num_samples. """ intent = None table = None column = None num_samples = 10 # Default samples # Lowercase for easy matching q = query.lower() # Determine intent if "generate" in q or "synthetic" in q: intent = "generate" elif "summarize" in q or "summary" in q: intent = "summarize" elif "plot" in q or "distribution" in q or "chart" in q: intent = "plot" else: intent = "unknown" # Extract table name from query (simple match for demo) for t in ["sales", "customers", "products", "transactions"]: if t in q: table = t break # Extract column name (optional, e.g., "plot price distribution") match = re.search(r"(plot|distribution|of)\s+(\w+)", q) if match: column = match.group(2) # Extract number of samples if mentioned (e.g., "generate 500 samples") match_samples = re.search(r"(\d+)\s+(samples|records|rows)", q) if match_samples: num_samples = int(match_samples.group(1)) return { "intent": intent, "table": table, "column": column, "num_samples": num_samples }