Spaces:
Sleeping
Sleeping
| using Printf | |
| # ============================================================================== | |
| # VSA SQL PARSER + REPL | |
| # Real query language for the Atomic Vector Database | |
| # Not a wrapper — an actual parser that tokenizes, parses, and executes | |
| # ============================================================================== | |
| # --- Token Types --- | |
| @enum TokenType begin | |
| T_KEYWORD # CREATE, SELECT, INSERT, DELETE, etc. | |
| T_IDENT # table/column names | |
| T_NUMBER # numeric literals | |
| T_STRING # quoted strings | |
| T_OPERATOR # =, >, <, >=, <= | |
| T_COMMA | |
| T_LPAREN | |
| T_RPAREN | |
| T_STAR | |
| T_SEMICOLON | |
| T_EOF | |
| end | |
| struct Token | |
| type::TokenType | |
| value::String | |
| end | |
| # --- Lexer --- | |
| const KEYWORDS = Set(["CREATE", "TABLE", "INSERT", "INTO", "VALUES", | |
| "SELECT", "FROM", "WHERE", "DELETE", "VACUUM", | |
| "EXPLAIN", "SIMILAR", "TO", "LIMIT", "AND", | |
| "SHOW", "TABLES", "DESCRIBE", "DROP", | |
| "THERMO", "CAT", "ORD", "INDEX", "ON"]) | |
| function tokenize(sql::String) | |
| tokens = Token[] | |
| i = 1 | |
| s = strip(sql) | |
| while i <= length(s) | |
| c = s[i] | |
| # Skip whitespace | |
| if isspace(c) | |
| i += 1 | |
| continue | |
| end | |
| # Operators | |
| if c == '=' | |
| push!(tokens, Token(T_OPERATOR, "=")) | |
| i += 1 | |
| elseif c == '>' && i < length(s) && s[i+1] == '=' | |
| push!(tokens, Token(T_OPERATOR, ">=")) | |
| i += 2 | |
| elseif c == '<' && i < length(s) && s[i+1] == '=' | |
| push!(tokens, Token(T_OPERATOR, "<=")) | |
| i += 2 | |
| elseif c == '>' | |
| push!(tokens, Token(T_OPERATOR, ">")) | |
| i += 1 | |
| elseif c == '<' | |
| push!(tokens, Token(T_OPERATOR, "<")) | |
| i += 1 | |
| elseif c == ',' | |
| push!(tokens, Token(T_COMMA, ",")) | |
| i += 1 | |
| elseif c == '(' | |
| push!(tokens, Token(T_LPAREN, "(")) | |
| i += 1 | |
| elseif c == ')' | |
| push!(tokens, Token(T_RPAREN, ")")) | |
| i += 1 | |
| elseif c == '*' | |
| push!(tokens, Token(T_STAR, "*")) | |
| i += 1 | |
| elseif c == ';' | |
| push!(tokens, Token(T_SEMICOLON, ";")) | |
| i += 1 | |
| # Quoted string | |
| elseif c == '\'' | |
| j = i + 1 | |
| while j <= length(s) && s[j] != '\'' | |
| j += 1 | |
| end | |
| push!(tokens, Token(T_STRING, s[i+1:j-1])) | |
| i = j + 1 | |
| # Number | |
| elseif isdigit(c) || (c == '-' && i < length(s) && isdigit(s[i+1])) | |
| j = i | |
| if c == '-' j += 1 end | |
| while j <= length(s) && (isdigit(s[j]) || s[j] == '.') | |
| j += 1 | |
| end | |
| push!(tokens, Token(T_NUMBER, s[i:j-1])) | |
| i = j | |
| # Identifier / Keyword | |
| elseif isletter(c) || c == '_' | |
| j = i | |
| while j <= length(s) && (isletter(s[j]) || isdigit(s[j]) || s[j] == '_') | |
| j += 1 | |
| end | |
| word = s[i:j-1] | |
| if uppercase(word) in KEYWORDS | |
| push!(tokens, Token(T_KEYWORD, uppercase(word))) | |
| else | |
| push!(tokens, Token(T_IDENT, word)) | |
| end | |
| i = j | |
| else | |
| i += 1 # Skip unknown | |
| end | |
| end | |
| push!(tokens, Token(T_EOF, "")) | |
| return tokens | |
| end | |
| # --- AST Nodes --- | |
| abstract type SQLStatement end | |
| struct CreateTableStmt <: SQLStatement | |
| table_name::String | |
| columns::Vector{Tuple{String, String, Vector{String}}} # (name, type, params) | |
| end | |
| struct InsertStmt <: SQLStatement | |
| table_name::String | |
| values::Vector{String} | |
| end | |
| struct SelectStmt <: SQLStatement | |
| table_name::String | |
| where_field::String | |
| where_op::String | |
| where_value::String | |
| limit::Int | |
| is_similar::Bool | |
| similar_id::String | |
| end | |
| struct DeleteStmt <: SQLStatement | |
| table_name::String | |
| id::String | |
| end | |
| struct VacuumStmt <: SQLStatement | |
| table_name::String | |
| end | |
| struct ExplainStmt <: SQLStatement | |
| inner::SQLStatement | |
| end | |
| struct ShowTablesStmt <: SQLStatement end | |
| struct DescribeStmt <: SQLStatement | |
| table_name::String | |
| end | |
| struct DropTableStmt <: SQLStatement | |
| table_name::String | |
| end | |
| struct SelectCountStmt <: SQLStatement | |
| table_name::String | |
| end | |
| # --- Parser --- | |
| mutable struct Parser | |
| tokens::Vector{Token} | |
| pos::Int | |
| end | |
| function peek(p::Parser) | |
| p.pos <= length(p.tokens) ? p.tokens[p.pos] : Token(T_EOF, "") | |
| end | |
| function advance!(p::Parser) | |
| tok = peek(p) | |
| p.pos += 1 | |
| return tok | |
| end | |
| function expect!(p::Parser, type::TokenType) | |
| tok = advance!(p) | |
| tok.type != type && error("Expected $(type), got $(tok.type) '$(tok.value)'") | |
| return tok | |
| end | |
| function expect_keyword!(p::Parser, kw::String) | |
| tok = advance!(p) | |
| (tok.type != T_KEYWORD || tok.value != kw) && error("Expected keyword '$kw', got '$(tok.value)'") | |
| return tok | |
| end | |
| function parse_sql(sql::String) | |
| tokens = tokenize(sql) | |
| p = Parser(tokens, 1) | |
| first = peek(p) | |
| if first.type == T_KEYWORD | |
| if first.value == "CREATE" | |
| return parse_create(p) | |
| elseif first.value == "INSERT" | |
| return parse_insert(p) | |
| elseif first.value == "SELECT" | |
| return parse_select(p) | |
| elseif first.value == "DELETE" | |
| return parse_delete(p) | |
| elseif first.value == "VACUUM" | |
| return parse_vacuum(p) | |
| elseif first.value == "EXPLAIN" | |
| advance!(p) | |
| return ExplainStmt(parse_sql_from(p)) | |
| elseif first.value == "SHOW" | |
| advance!(p) | |
| expect_keyword!(p, "TABLES") | |
| return ShowTablesStmt() | |
| elseif first.value == "DESCRIBE" | |
| advance!(p) | |
| name = expect!(p, T_IDENT) | |
| return DescribeStmt(name.value) | |
| elseif first.value == "DROP" | |
| advance!(p) | |
| expect_keyword!(p, "TABLE") | |
| name = expect!(p, T_IDENT) | |
| return DropTableStmt(name.value) | |
| end | |
| end | |
| error("Unknown statement starting with '$(first.value)'") | |
| end | |
| function parse_sql_from(p::Parser) | |
| first = peek(p) | |
| if first.value == "SELECT" | |
| return parse_select(p) | |
| end | |
| error("Expected SELECT after EXPLAIN") | |
| end | |
| function parse_create(p::Parser) | |
| expect_keyword!(p, "CREATE") | |
| expect_keyword!(p, "TABLE") | |
| name = expect!(p, T_IDENT) | |
| expect!(p, T_LPAREN) | |
| columns = Tuple{String, String, Vector{String}}[] | |
| while peek(p).type != T_RPAREN && peek(p).type != T_EOF | |
| col_name = expect!(p, T_IDENT) | |
| col_type = advance!(p) # THERMO, CAT, ORD | |
| params = String[] | |
| if peek(p).type == T_LPAREN | |
| advance!(p) # ( | |
| while peek(p).type != T_RPAREN && peek(p).type != T_EOF | |
| tok = advance!(p) | |
| if tok.type != T_COMMA | |
| push!(params, tok.value) | |
| end | |
| end | |
| expect!(p, T_RPAREN) | |
| end | |
| push!(columns, (col_name.value, col_type.value, params)) | |
| if peek(p).type == T_COMMA | |
| advance!(p) | |
| end | |
| end | |
| expect!(p, T_RPAREN) | |
| return CreateTableStmt(name.value, columns) | |
| end | |
| function parse_insert(p::Parser) | |
| expect_keyword!(p, "INSERT") | |
| expect_keyword!(p, "INTO") | |
| name = expect!(p, T_IDENT) | |
| expect_keyword!(p, "VALUES") | |
| expect!(p, T_LPAREN) | |
| values = String[] | |
| while peek(p).type != T_RPAREN && peek(p).type != T_EOF | |
| tok = advance!(p) | |
| if tok.type != T_COMMA | |
| push!(values, tok.value) | |
| end | |
| end | |
| expect!(p, T_RPAREN) | |
| return InsertStmt(name.value, values) | |
| end | |
| function parse_select(p::Parser) | |
| expect_keyword!(p, "SELECT") | |
| # Check for COUNT(*) | |
| if peek(p).type == T_IDENT && uppercase(peek(p).value) == "COUNT" | |
| advance!(p) # COUNT | |
| expect!(p, T_LPAREN) | |
| expect!(p, T_STAR) | |
| expect!(p, T_RPAREN) | |
| expect_keyword!(p, "FROM") | |
| table = expect!(p, T_IDENT) | |
| return SelectCountStmt(table.value) | |
| end | |
| # Check for SIMILAR TO | |
| if peek(p).type == T_KEYWORD && peek(p).value == "SIMILAR" | |
| advance!(p) # SIMILAR | |
| expect_keyword!(p, "TO") | |
| id_tok = advance!(p) | |
| expect_keyword!(p, "FROM") | |
| table = expect!(p, T_IDENT) | |
| lim = 10 | |
| if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT" | |
| advance!(p) | |
| lim_tok = expect!(p, T_NUMBER) | |
| lim = parse(Int, lim_tok.value) | |
| end | |
| return SelectStmt(table.value, "", "", "", lim, true, id_tok.value) | |
| end | |
| # SELECT * or SELECT FROM | |
| if peek(p).type == T_STAR | |
| advance!(p) | |
| end | |
| expect_keyword!(p, "FROM") | |
| table = expect!(p, T_IDENT) | |
| where_field = "" | |
| where_op = "" | |
| where_value = "" | |
| lim = 10 | |
| if peek(p).type == T_KEYWORD && peek(p).value == "WHERE" | |
| advance!(p) | |
| field = expect!(p, T_IDENT) | |
| op = expect!(p, T_OPERATOR) | |
| val = advance!(p) | |
| where_field = field.value | |
| where_op = op.value | |
| where_value = val.value | |
| end | |
| if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT" | |
| advance!(p) | |
| lim_tok = expect!(p, T_NUMBER) | |
| lim = parse(Int, lim_tok.value) | |
| end | |
| return SelectStmt(table.value, where_field, where_op, where_value, lim, false, "") | |
| end | |
| function parse_delete(p::Parser) | |
| expect_keyword!(p, "DELETE") | |
| expect_keyword!(p, "FROM") | |
| table = expect!(p, T_IDENT) | |
| expect_keyword!(p, "WHERE") | |
| # Only support WHERE id = 'value' for now | |
| field = expect!(p, T_IDENT) | |
| expect!(p, T_OPERATOR) # = | |
| val = advance!(p) | |
| return DeleteStmt(table.value, val.value) | |
| end | |
| function parse_vacuum(p::Parser) | |
| expect_keyword!(p, "VACUUM") | |
| name = expect!(p, T_IDENT) | |
| return VacuumStmt(name.value) | |
| end | |
| # --- Executor --- | |
| mutable struct VSAEngine | |
| reg::VSARegistry | |
| tables::Dict{String, VDBTable} | |
| dim::Int | |
| end | |
| function VSAEngine(dim::Int=2048) | |
| return VSAEngine(VSARegistry(), Dict{String, VDBTable}(), dim) | |
| end | |
| function execute!(engine::VSAEngine, stmt::CreateTableStmt) | |
| schema = Tuple{String, VSAEncoder}[] | |
| for (name, typ, params) in stmt.columns | |
| enc = if typ == "THERMO" | |
| min_v = length(params) >= 1 ? parse(Float64, params[1]) : 0.0 | |
| max_v = length(params) >= 2 ? parse(Float64, params[2]) : 100.0 | |
| levels = length(params) >= 3 ? parse(Int, params[3]) : 100 | |
| ThermometerEncoder(engine.reg, name, min_v, max_v; levels=levels) | |
| elseif typ == "CAT" | |
| CategoricalEncoder(engine.reg, name, params) | |
| else | |
| CategoricalEncoder(engine.reg, name, String[]) | |
| end | |
| push!(schema, (name, enc)) | |
| end | |
| table = create_table(engine.reg, stmt.table_name, engine.dim, schema) | |
| engine.tables[stmt.table_name] = table | |
| println(" OK. Table '$(stmt.table_name)' created with $(length(schema)) columns.") | |
| end | |
| function execute!(engine::VSAEngine, stmt::InsertStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| # Map values to columns | |
| if length(stmt.values) < 1 | |
| return println(" ERROR: Need at least ID value.") | |
| end | |
| id = stmt.values[1] | |
| fields = Dict{String, Any}() | |
| for (i, col) in enumerate(table.columns) | |
| vi = i + 1 # +1 because first value is the ID | |
| if vi <= length(stmt.values) | |
| val = tryparse(Float64, stmt.values[vi]) | |
| fields[col.name] = val !== nothing ? val : stmt.values[vi] | |
| end | |
| end | |
| vdb_insert!(table, id, fields) | |
| println(" OK. Inserted '$(id)' into '$(stmt.table_name)'.") | |
| end | |
| function execute!(engine::VSAEngine, stmt::SelectStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| if stmt.is_similar | |
| result = vdb_select_similar(table, stmt.similar_id; top_k=stmt.limit) | |
| println(" Plan: $(result.plan)") | |
| println(" Results:") | |
| for (id, score) in zip(result.ids, result.scores) | |
| @printf(" %-10s score=%.4f\n", id, score) | |
| end | |
| @printf(" %d rows returned.\n", length(result.ids)) | |
| return | |
| end | |
| if isempty(stmt.where_field) | |
| # SELECT * FROM table (show all IDs) | |
| n = min(stmt.limit, length(table.record_ids)) | |
| println(" $(length(table.record_ids)) total records (showing $n):") | |
| for i in 1:n | |
| println(" $(table.record_ids[i])") | |
| end | |
| return | |
| end | |
| op = stmt.where_op == "=" ? :(==) : | |
| stmt.where_op == ">" ? :(>) : | |
| stmt.where_op == "<" ? :(<) : :(==) | |
| # Determine value type | |
| val = tryparse(Float64, stmt.where_value) | |
| value = val !== nothing ? val : stmt.where_value | |
| result = vdb_select(table, stmt.where_field, op, value; top_k=stmt.limit) | |
| println(" Plan: $(result.plan)") | |
| println(" Results:") | |
| for (id, score) in zip(result.ids, result.scores) | |
| @printf(" %-10s score=%.4f\n", id, score) | |
| end | |
| @printf(" %d rows returned.\n", length(result.ids)) | |
| end | |
| function execute!(engine::VSAEngine, stmt::DeleteStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| ok = vdb_delete!(table, stmt.id) | |
| println(ok ? " OK. Deleted '$(stmt.id)'. $(length(table.records)) records remain." : | |
| " ERROR: '$(stmt.id)' not found.") | |
| end | |
| function execute!(engine::VSAEngine, stmt::VacuumStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| n = vdb_vacuum!(table) | |
| println(" OK. WAL compacted ($n entries). Indices rebuilt.") | |
| end | |
| function execute!(engine::VSAEngine, stmt::ExplainStmt) | |
| if stmt.inner isa SelectStmt && !isempty(stmt.inner.where_field) | |
| table = get(engine.tables, stmt.inner.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table not found.") | |
| val = tryparse(Float64, stmt.inner.where_value) | |
| value = val !== nothing ? val : stmt.inner.where_value | |
| op = stmt.inner.where_op == "=" ? :(==) : Symbol(stmt.inner.where_op) | |
| vdb_explain(table, stmt.inner.where_field, op, value) | |
| else | |
| println(" EXPLAIN only supports SELECT...WHERE queries.") | |
| end | |
| end | |
| function execute!(engine::VSAEngine, stmt::ShowTablesStmt) | |
| if isempty(engine.tables) | |
| println(" No tables.") | |
| else | |
| for (name, table) in engine.tables | |
| @printf(" %-20s %d records, %d columns\n", name, length(table.records), length(table.columns)) | |
| end | |
| end | |
| end | |
| function execute!(engine::VSAEngine, stmt::DescribeStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| vdb_stats(table) | |
| end | |
| function execute!(engine::VSAEngine, stmt::DropTableStmt) | |
| if haskey(engine.tables, stmt.table_name) | |
| n = length(engine.tables[stmt.table_name].records) | |
| delete!(engine.tables, stmt.table_name) | |
| println(" OK. Table '$(stmt.table_name)' dropped ($n records removed).") | |
| else | |
| println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| end | |
| end | |
| function execute!(engine::VSAEngine, stmt::SelectCountStmt) | |
| table = get(engine.tables, stmt.table_name, nothing) | |
| table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.") | |
| println(" COUNT(*) = $(vdb_count(table))") | |
| end | |
| # --- Execute SQL String --- | |
| function sql!(engine::VSAEngine, query::String) | |
| try | |
| stmt = parse_sql(query) | |
| t = @elapsed execute!(engine, stmt) | |
| @printf(" (%.3f ms)\n", t * 1000) | |
| catch e | |
| println(" ERROR: ", e) | |
| end | |
| end | |
| # --- Interactive REPL --- | |
| function repl(engine::VSAEngine) | |
| println("VSA Vector Database REPL") | |
| println("Type SQL commands. Type 'exit' to quit.\n") | |
| while true | |
| print("vsa> ") | |
| line = readline() | |
| stripped = strip(line) | |
| isempty(stripped) && continue | |
| lowercase(stripped) == "exit" && break | |
| lowercase(stripped) == "quit" && break | |
| sql!(engine, stripped) | |
| println() | |
| end | |
| println("Goodbye.") | |
| end | |