using Printf # ============================================================================== # VSA SQL PARSER + REPL # Real query language for the Atomic Vector Database # Not a wrapper — an actual parser that tokenizes, parses, and executes # ============================================================================== # --- Token Types --- @enum TokenType begin T_KEYWORD # CREATE, SELECT, INSERT, DELETE, etc. T_IDENT # table/column names T_NUMBER # numeric literals T_STRING # quoted strings T_OPERATOR # =, >, <, >=, <= T_COMMA T_LPAREN T_RPAREN T_STAR T_SEMICOLON T_EOF end struct Token type::TokenType value::String end # --- Lexer --- const KEYWORDS = Set(["CREATE", "TABLE", "INSERT", "INTO", "VALUES", "SELECT", "FROM", "WHERE", "DELETE", "VACUUM", "EXPLAIN", "SIMILAR", "TO", "LIMIT", "AND", "SHOW", "TABLES", "DESCRIBE", "DROP", "THERMO", "CAT", "ORD", "INDEX", "ON"]) function tokenize(sql::String) tokens = Token[] i = 1 s = strip(sql) while i <= length(s) c = s[i] # Skip whitespace if isspace(c) i += 1 continue end # Operators if c == '=' push!(tokens, Token(T_OPERATOR, "=")) i += 1 elseif c == '>' && i < length(s) && s[i+1] == '=' push!(tokens, Token(T_OPERATOR, ">=")) i += 2 elseif c == '<' && i < length(s) && s[i+1] == '=' push!(tokens, Token(T_OPERATOR, "<=")) i += 2 elseif c == '>' push!(tokens, Token(T_OPERATOR, ">")) i += 1 elseif c == '<' push!(tokens, Token(T_OPERATOR, "<")) i += 1 elseif c == ',' push!(tokens, Token(T_COMMA, ",")) i += 1 elseif c == '(' push!(tokens, Token(T_LPAREN, "(")) i += 1 elseif c == ')' push!(tokens, Token(T_RPAREN, ")")) i += 1 elseif c == '*' push!(tokens, Token(T_STAR, "*")) i += 1 elseif c == ';' push!(tokens, Token(T_SEMICOLON, ";")) i += 1 # Quoted string elseif c == '\'' j = i + 1 while j <= length(s) && s[j] != '\'' j += 1 end push!(tokens, Token(T_STRING, s[i+1:j-1])) i = j + 1 # Number elseif isdigit(c) || (c == '-' && i < length(s) && isdigit(s[i+1])) j = i if c == '-' j += 1 end while j <= length(s) && (isdigit(s[j]) || s[j] == '.') j += 1 end push!(tokens, Token(T_NUMBER, s[i:j-1])) i = j # Identifier / Keyword elseif isletter(c) || c == '_' j = i while j <= length(s) && (isletter(s[j]) || isdigit(s[j]) || s[j] == '_') j += 1 end word = s[i:j-1] if uppercase(word) in KEYWORDS push!(tokens, Token(T_KEYWORD, uppercase(word))) else push!(tokens, Token(T_IDENT, word)) end i = j else i += 1 # Skip unknown end end push!(tokens, Token(T_EOF, "")) return tokens end # --- AST Nodes --- abstract type SQLStatement end struct CreateTableStmt <: SQLStatement table_name::String columns::Vector{Tuple{String, String, Vector{String}}} # (name, type, params) end struct InsertStmt <: SQLStatement table_name::String values::Vector{String} end struct SelectStmt <: SQLStatement table_name::String where_field::String where_op::String where_value::String limit::Int is_similar::Bool similar_id::String end struct DeleteStmt <: SQLStatement table_name::String id::String end struct VacuumStmt <: SQLStatement table_name::String end struct ExplainStmt <: SQLStatement inner::SQLStatement end struct ShowTablesStmt <: SQLStatement end struct DescribeStmt <: SQLStatement table_name::String end struct DropTableStmt <: SQLStatement table_name::String end struct SelectCountStmt <: SQLStatement table_name::String end # --- Parser --- mutable struct Parser tokens::Vector{Token} pos::Int end function peek(p::Parser) p.pos <= length(p.tokens) ? p.tokens[p.pos] : Token(T_EOF, "") end function advance!(p::Parser) tok = peek(p) p.pos += 1 return tok end function expect!(p::Parser, type::TokenType) tok = advance!(p) tok.type != type && error("Expected $(type), got $(tok.type) '$(tok.value)'") return tok end function expect_keyword!(p::Parser, kw::String) tok = advance!(p) (tok.type != T_KEYWORD || tok.value != kw) && error("Expected keyword '$kw', got '$(tok.value)'") return tok end function parse_sql(sql::String) tokens = tokenize(sql) p = Parser(tokens, 1) first = peek(p) if first.type == T_KEYWORD if first.value == "CREATE" return parse_create(p) elseif first.value == "INSERT" return parse_insert(p) elseif first.value == "SELECT" return parse_select(p) elseif first.value == "DELETE" return parse_delete(p) elseif first.value == "VACUUM" return parse_vacuum(p) elseif first.value == "EXPLAIN" advance!(p) return ExplainStmt(parse_sql_from(p)) elseif first.value == "SHOW" advance!(p) expect_keyword!(p, "TABLES") return ShowTablesStmt() elseif first.value == "DESCRIBE" advance!(p) name = expect!(p, T_IDENT) return DescribeStmt(name.value) elseif first.value == "DROP" advance!(p) expect_keyword!(p, "TABLE") name = expect!(p, T_IDENT) return DropTableStmt(name.value) end end error("Unknown statement starting with '$(first.value)'") end function parse_sql_from(p::Parser) first = peek(p) if first.value == "SELECT" return parse_select(p) end error("Expected SELECT after EXPLAIN") end function parse_create(p::Parser) expect_keyword!(p, "CREATE") expect_keyword!(p, "TABLE") name = expect!(p, T_IDENT) expect!(p, T_LPAREN) columns = Tuple{String, String, Vector{String}}[] while peek(p).type != T_RPAREN && peek(p).type != T_EOF col_name = expect!(p, T_IDENT) col_type = advance!(p) # THERMO, CAT, ORD params = String[] if peek(p).type == T_LPAREN advance!(p) # ( while peek(p).type != T_RPAREN && peek(p).type != T_EOF tok = advance!(p) if tok.type != T_COMMA push!(params, tok.value) end end expect!(p, T_RPAREN) end push!(columns, (col_name.value, col_type.value, params)) if peek(p).type == T_COMMA advance!(p) end end expect!(p, T_RPAREN) return CreateTableStmt(name.value, columns) end function parse_insert(p::Parser) expect_keyword!(p, "INSERT") expect_keyword!(p, "INTO") name = expect!(p, T_IDENT) expect_keyword!(p, "VALUES") expect!(p, T_LPAREN) values = String[] while peek(p).type != T_RPAREN && peek(p).type != T_EOF tok = advance!(p) if tok.type != T_COMMA push!(values, tok.value) end end expect!(p, T_RPAREN) return InsertStmt(name.value, values) end function parse_select(p::Parser) expect_keyword!(p, "SELECT") # Check for COUNT(*) if peek(p).type == T_IDENT && uppercase(peek(p).value) == "COUNT" advance!(p) # COUNT expect!(p, T_LPAREN) expect!(p, T_STAR) expect!(p, T_RPAREN) expect_keyword!(p, "FROM") table = expect!(p, T_IDENT) return SelectCountStmt(table.value) end # Check for SIMILAR TO if peek(p).type == T_KEYWORD && peek(p).value == "SIMILAR" advance!(p) # SIMILAR expect_keyword!(p, "TO") id_tok = advance!(p) expect_keyword!(p, "FROM") table = expect!(p, T_IDENT) lim = 10 if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT" advance!(p) lim_tok = expect!(p, T_NUMBER) lim = parse(Int, lim_tok.value) end return SelectStmt(table.value, "", "", "", lim, true, id_tok.value) end # SELECT * or SELECT FROM if peek(p).type == T_STAR advance!(p) end expect_keyword!(p, "FROM") table = expect!(p, T_IDENT) where_field = "" where_op = "" where_value = "" lim = 10 if peek(p).type == T_KEYWORD && peek(p).value == "WHERE" advance!(p) field = expect!(p, T_IDENT) op = expect!(p, T_OPERATOR) val = advance!(p) where_field = field.value where_op = op.value where_value = val.value end if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT" advance!(p) lim_tok = expect!(p, T_NUMBER) lim = parse(Int, lim_tok.value) end return SelectStmt(table.value, where_field, where_op, where_value, lim, false, "") end function parse_delete(p::Parser) expect_keyword!(p, "DELETE") expect_keyword!(p, "FROM") table = expect!(p, T_IDENT) expect_keyword!(p, "WHERE") # Only support WHERE id = 'value' for now field = expect!(p, T_IDENT) expect!(p, T_OPERATOR) # = val = advance!(p) return DeleteStmt(table.value, val.value) end function parse_vacuum(p::Parser) expect_keyword!(p, "VACUUM") name = expect!(p, T_IDENT) return VacuumStmt(name.value) end # --- Executor --- mutable struct VSAEngine reg::VSARegistry tables::Dict{String, VDBTable} dim::Int end function VSAEngine(dim::Int=2048) return VSAEngine(VSARegistry(), Dict{String, VDBTable}(), dim) end function execute!(engine::VSAEngine, stmt::CreateTableStmt) schema = Tuple{String, VSAEncoder}[] for (name, typ, params) in stmt.columns enc = if typ == "THERMO" min_v = length(params) >= 1 ? parse(Float64, params[1]) : 0.0 max_v = length(params) >= 2 ? parse(Float64, params[2]) : 100.0 levels = length(params) >= 3 ? parse(Int, params[3]) : 100 ThermometerEncoder(engine.reg, name, min_v, max_v; levels=levels) elseif typ == "CAT" CategoricalEncoder(engine.reg, name, params) else CategoricalEncoder(engine.reg, name, String[]) end push!(schema, (name, enc)) end table = create_table(engine.reg, stmt.table_name, engine.dim, schema) engine.tables[stmt.table_name] = table println(" OK. Table '$(stmt.table_name)' created with $(length(schema)) columns.") end function execute!(engine::VSAEngine, stmt::InsertStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") # Map values to columns if length(stmt.values) < 1 return println(" ERROR: Need at least ID value.") end id = stmt.values[1] fields = Dict{String, Any}() for (i, col) in enumerate(table.columns) vi = i + 1 # +1 because first value is the ID if vi <= length(stmt.values) val = tryparse(Float64, stmt.values[vi]) fields[col.name] = val !== nothing ? val : stmt.values[vi] end end vdb_insert!(table, id, fields) println(" OK. Inserted '$(id)' into '$(stmt.table_name)'.") end function execute!(engine::VSAEngine, stmt::SelectStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") if stmt.is_similar result = vdb_select_similar(table, stmt.similar_id; top_k=stmt.limit) println(" Plan: $(result.plan)") println(" Results:") for (id, score) in zip(result.ids, result.scores) @printf(" %-10s score=%.4f\n", id, score) end @printf(" %d rows returned.\n", length(result.ids)) return end if isempty(stmt.where_field) # SELECT * FROM table (show all IDs) n = min(stmt.limit, length(table.record_ids)) println(" $(length(table.record_ids)) total records (showing $n):") for i in 1:n println(" $(table.record_ids[i])") end return end op = stmt.where_op == "=" ? :(==) : stmt.where_op == ">" ? :(>) : stmt.where_op == "<" ? :(<) : :(==) # Determine value type val = tryparse(Float64, stmt.where_value) value = val !== nothing ? val : stmt.where_value result = vdb_select(table, stmt.where_field, op, value; top_k=stmt.limit) println(" Plan: $(result.plan)") println(" Results:") for (id, score) in zip(result.ids, result.scores) @printf(" %-10s score=%.4f\n", id, score) end @printf(" %d rows returned.\n", length(result.ids)) end function execute!(engine::VSAEngine, stmt::DeleteStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") ok = vdb_delete!(table, stmt.id) println(ok ? " OK. Deleted '$(stmt.id)'. $(length(table.records)) records remain." : " ERROR: '$(stmt.id)' not found.") end function execute!(engine::VSAEngine, stmt::VacuumStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") n = vdb_vacuum!(table) println(" OK. WAL compacted ($n entries). Indices rebuilt.") end function execute!(engine::VSAEngine, stmt::ExplainStmt) if stmt.inner isa SelectStmt && !isempty(stmt.inner.where_field) table = get(engine.tables, stmt.inner.table_name, nothing) table === nothing && return println(" ERROR: Table not found.") val = tryparse(Float64, stmt.inner.where_value) value = val !== nothing ? val : stmt.inner.where_value op = stmt.inner.where_op == "=" ? :(==) : Symbol(stmt.inner.where_op) vdb_explain(table, stmt.inner.where_field, op, value) else println(" EXPLAIN only supports SELECT...WHERE queries.") end end function execute!(engine::VSAEngine, stmt::ShowTablesStmt) if isempty(engine.tables) println(" No tables.") else for (name, table) in engine.tables @printf(" %-20s %d records, %d columns\n", name, length(table.records), length(table.columns)) end end end function execute!(engine::VSAEngine, stmt::DescribeStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") vdb_stats(table) end function execute!(engine::VSAEngine, stmt::DropTableStmt) if haskey(engine.tables, stmt.table_name) n = length(engine.tables[stmt.table_name].records) delete!(engine.tables, stmt.table_name) println(" OK. Table '$(stmt.table_name)' dropped ($n records removed).") else println(" ERROR: Table '$(stmt.table_name)' not found.") end end function execute!(engine::VSAEngine, stmt::SelectCountStmt) table = get(engine.tables, stmt.table_name, nothing) table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") println(" COUNT(*) = $(vdb_count(table))") end # --- Execute SQL String --- function sql!(engine::VSAEngine, query::String) try stmt = parse_sql(query) t = @elapsed execute!(engine, stmt) @printf(" (%.3f ms)\n", t * 1000) catch e println(" ERROR: ", e) end end # --- Interactive REPL --- function repl(engine::VSAEngine) println("VSA Vector Database REPL") println("Type SQL commands. Type 'exit' to quit.\n") while true print("vsa> ") line = readline() stripped = strip(line) isempty(stripped) && continue lowercase(stripped) == "exit" && break lowercase(stripped) == "quit" && break sql!(engine, stripped) println() end println("Goodbye.") end