File size: 1,697 Bytes
5c5a222 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | """
NeoSyn - query_engine.py
This module processes natural language queries to determine user intent:
- Whether to generate synthetic data
- Whether to summarize data
- Whether to plot distributions
Author: Saivivek Katkuri
Date: June 2025
"""
import re
def parse_query(query: str) -> dict:
"""
Parses the user query and extracts intent, table, column, and sample size.
Args:
query (str): User's natural language query.
Returns:
dict: Dictionary with intent, table, column (if any), and num_samples.
"""
intent = None
table = None
column = None
num_samples = 10 # Default samples
# Lowercase for easy matching
q = query.lower()
# Determine intent
if "generate" in q or "synthetic" in q:
intent = "generate"
elif "summarize" in q or "summary" in q:
intent = "summarize"
elif "plot" in q or "distribution" in q or "chart" in q:
intent = "plot"
else:
intent = "unknown"
# Extract table name from query (simple match for demo)
for t in ["sales", "customers", "products", "transactions"]:
if t in q:
table = t
break
# Extract column name (optional, e.g., "plot price distribution")
match = re.search(r"(plot|distribution|of)\s+(\w+)", q)
if match:
column = match.group(2)
# Extract number of samples if mentioned (e.g., "generate 500 samples")
match_samples = re.search(r"(\d+)\s+(samples|records|rows)", q)
if match_samples:
num_samples = int(match_samples.group(1))
return {
"intent": intent,
"table": table,
"column": column,
"num_samples": num_samples
} |