Atomic-VSA / src /vsa_sql.jl
marshad180's picture
Update Atomic VSA deployment
fa6bd30 verified
using Printf
# ==============================================================================
# VSA SQL PARSER + REPL
# Real query language for the Atomic Vector Database
# Not a wrapper — an actual parser that tokenizes, parses, and executes
# ==============================================================================
# --- Token Types ---
@enum TokenType begin
T_KEYWORD # CREATE, SELECT, INSERT, DELETE, etc.
T_IDENT # table/column names
T_NUMBER # numeric literals
T_STRING # quoted strings
T_OPERATOR # =, >, <, >=, <=
T_COMMA
T_LPAREN
T_RPAREN
T_STAR
T_SEMICOLON
T_EOF
end
struct Token
type::TokenType
value::String
end
# --- Lexer ---
const KEYWORDS = Set(["CREATE", "TABLE", "INSERT", "INTO", "VALUES",
"SELECT", "FROM", "WHERE", "DELETE", "VACUUM",
"EXPLAIN", "SIMILAR", "TO", "LIMIT", "AND",
"SHOW", "TABLES", "DESCRIBE", "DROP",
"THERMO", "CAT", "ORD", "INDEX", "ON"])
function tokenize(sql::String)
tokens = Token[]
i = 1
s = strip(sql)
while i <= length(s)
c = s[i]
# Skip whitespace
if isspace(c)
i += 1
continue
end
# Operators
if c == '='
push!(tokens, Token(T_OPERATOR, "="))
i += 1
elseif c == '>' && i < length(s) && s[i+1] == '='
push!(tokens, Token(T_OPERATOR, ">="))
i += 2
elseif c == '<' && i < length(s) && s[i+1] == '='
push!(tokens, Token(T_OPERATOR, "<="))
i += 2
elseif c == '>'
push!(tokens, Token(T_OPERATOR, ">"))
i += 1
elseif c == '<'
push!(tokens, Token(T_OPERATOR, "<"))
i += 1
elseif c == ','
push!(tokens, Token(T_COMMA, ","))
i += 1
elseif c == '('
push!(tokens, Token(T_LPAREN, "("))
i += 1
elseif c == ')'
push!(tokens, Token(T_RPAREN, ")"))
i += 1
elseif c == '*'
push!(tokens, Token(T_STAR, "*"))
i += 1
elseif c == ';'
push!(tokens, Token(T_SEMICOLON, ";"))
i += 1
# Quoted string
elseif c == '\''
j = i + 1
while j <= length(s) && s[j] != '\''
j += 1
end
push!(tokens, Token(T_STRING, s[i+1:j-1]))
i = j + 1
# Number
elseif isdigit(c) || (c == '-' && i < length(s) && isdigit(s[i+1]))
j = i
if c == '-' j += 1 end
while j <= length(s) && (isdigit(s[j]) || s[j] == '.')
j += 1
end
push!(tokens, Token(T_NUMBER, s[i:j-1]))
i = j
# Identifier / Keyword
elseif isletter(c) || c == '_'
j = i
while j <= length(s) && (isletter(s[j]) || isdigit(s[j]) || s[j] == '_')
j += 1
end
word = s[i:j-1]
if uppercase(word) in KEYWORDS
push!(tokens, Token(T_KEYWORD, uppercase(word)))
else
push!(tokens, Token(T_IDENT, word))
end
i = j
else
i += 1 # Skip unknown
end
end
push!(tokens, Token(T_EOF, ""))
return tokens
end
# --- AST Nodes ---
abstract type SQLStatement end
struct CreateTableStmt <: SQLStatement
table_name::String
columns::Vector{Tuple{String, String, Vector{String}}} # (name, type, params)
end
struct InsertStmt <: SQLStatement
table_name::String
values::Vector{String}
end
struct SelectStmt <: SQLStatement
table_name::String
where_field::String
where_op::String
where_value::String
limit::Int
is_similar::Bool
similar_id::String
end
struct DeleteStmt <: SQLStatement
table_name::String
id::String
end
struct VacuumStmt <: SQLStatement
table_name::String
end
struct ExplainStmt <: SQLStatement
inner::SQLStatement
end
struct ShowTablesStmt <: SQLStatement end
struct DescribeStmt <: SQLStatement
table_name::String
end
struct DropTableStmt <: SQLStatement
table_name::String
end
struct SelectCountStmt <: SQLStatement
table_name::String
end
# --- Parser ---
mutable struct Parser
tokens::Vector{Token}
pos::Int
end
function peek(p::Parser)
p.pos <= length(p.tokens) ? p.tokens[p.pos] : Token(T_EOF, "")
end
function advance!(p::Parser)
tok = peek(p)
p.pos += 1
return tok
end
function expect!(p::Parser, type::TokenType)
tok = advance!(p)
tok.type != type && error("Expected $(type), got $(tok.type) '$(tok.value)'")
return tok
end
function expect_keyword!(p::Parser, kw::String)
tok = advance!(p)
(tok.type != T_KEYWORD || tok.value != kw) && error("Expected keyword '$kw', got '$(tok.value)'")
return tok
end
function parse_sql(sql::String)
tokens = tokenize(sql)
p = Parser(tokens, 1)
first = peek(p)
if first.type == T_KEYWORD
if first.value == "CREATE"
return parse_create(p)
elseif first.value == "INSERT"
return parse_insert(p)
elseif first.value == "SELECT"
return parse_select(p)
elseif first.value == "DELETE"
return parse_delete(p)
elseif first.value == "VACUUM"
return parse_vacuum(p)
elseif first.value == "EXPLAIN"
advance!(p)
return ExplainStmt(parse_sql_from(p))
elseif first.value == "SHOW"
advance!(p)
expect_keyword!(p, "TABLES")
return ShowTablesStmt()
elseif first.value == "DESCRIBE"
advance!(p)
name = expect!(p, T_IDENT)
return DescribeStmt(name.value)
elseif first.value == "DROP"
advance!(p)
expect_keyword!(p, "TABLE")
name = expect!(p, T_IDENT)
return DropTableStmt(name.value)
end
end
error("Unknown statement starting with '$(first.value)'")
end
function parse_sql_from(p::Parser)
first = peek(p)
if first.value == "SELECT"
return parse_select(p)
end
error("Expected SELECT after EXPLAIN")
end
function parse_create(p::Parser)
expect_keyword!(p, "CREATE")
expect_keyword!(p, "TABLE")
name = expect!(p, T_IDENT)
expect!(p, T_LPAREN)
columns = Tuple{String, String, Vector{String}}[]
while peek(p).type != T_RPAREN && peek(p).type != T_EOF
col_name = expect!(p, T_IDENT)
col_type = advance!(p) # THERMO, CAT, ORD
params = String[]
if peek(p).type == T_LPAREN
advance!(p) # (
while peek(p).type != T_RPAREN && peek(p).type != T_EOF
tok = advance!(p)
if tok.type != T_COMMA
push!(params, tok.value)
end
end
expect!(p, T_RPAREN)
end
push!(columns, (col_name.value, col_type.value, params))
if peek(p).type == T_COMMA
advance!(p)
end
end
expect!(p, T_RPAREN)
return CreateTableStmt(name.value, columns)
end
function parse_insert(p::Parser)
expect_keyword!(p, "INSERT")
expect_keyword!(p, "INTO")
name = expect!(p, T_IDENT)
expect_keyword!(p, "VALUES")
expect!(p, T_LPAREN)
values = String[]
while peek(p).type != T_RPAREN && peek(p).type != T_EOF
tok = advance!(p)
if tok.type != T_COMMA
push!(values, tok.value)
end
end
expect!(p, T_RPAREN)
return InsertStmt(name.value, values)
end
function parse_select(p::Parser)
expect_keyword!(p, "SELECT")
# Check for COUNT(*)
if peek(p).type == T_IDENT && uppercase(peek(p).value) == "COUNT"
advance!(p) # COUNT
expect!(p, T_LPAREN)
expect!(p, T_STAR)
expect!(p, T_RPAREN)
expect_keyword!(p, "FROM")
table = expect!(p, T_IDENT)
return SelectCountStmt(table.value)
end
# Check for SIMILAR TO
if peek(p).type == T_KEYWORD && peek(p).value == "SIMILAR"
advance!(p) # SIMILAR
expect_keyword!(p, "TO")
id_tok = advance!(p)
expect_keyword!(p, "FROM")
table = expect!(p, T_IDENT)
lim = 10
if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT"
advance!(p)
lim_tok = expect!(p, T_NUMBER)
lim = parse(Int, lim_tok.value)
end
return SelectStmt(table.value, "", "", "", lim, true, id_tok.value)
end
# SELECT * or SELECT FROM
if peek(p).type == T_STAR
advance!(p)
end
expect_keyword!(p, "FROM")
table = expect!(p, T_IDENT)
where_field = ""
where_op = ""
where_value = ""
lim = 10
if peek(p).type == T_KEYWORD && peek(p).value == "WHERE"
advance!(p)
field = expect!(p, T_IDENT)
op = expect!(p, T_OPERATOR)
val = advance!(p)
where_field = field.value
where_op = op.value
where_value = val.value
end
if peek(p).type == T_KEYWORD && peek(p).value == "LIMIT"
advance!(p)
lim_tok = expect!(p, T_NUMBER)
lim = parse(Int, lim_tok.value)
end
return SelectStmt(table.value, where_field, where_op, where_value, lim, false, "")
end
function parse_delete(p::Parser)
expect_keyword!(p, "DELETE")
expect_keyword!(p, "FROM")
table = expect!(p, T_IDENT)
expect_keyword!(p, "WHERE")
# Only support WHERE id = 'value' for now
field = expect!(p, T_IDENT)
expect!(p, T_OPERATOR) # =
val = advance!(p)
return DeleteStmt(table.value, val.value)
end
function parse_vacuum(p::Parser)
expect_keyword!(p, "VACUUM")
name = expect!(p, T_IDENT)
return VacuumStmt(name.value)
end
# --- Executor ---
mutable struct VSAEngine
reg::VSARegistry
tables::Dict{String, VDBTable}
dim::Int
end
function VSAEngine(dim::Int=2048)
return VSAEngine(VSARegistry(), Dict{String, VDBTable}(), dim)
end
function execute!(engine::VSAEngine, stmt::CreateTableStmt)
schema = Tuple{String, VSAEncoder}[]
for (name, typ, params) in stmt.columns
enc = if typ == "THERMO"
min_v = length(params) >= 1 ? parse(Float64, params[1]) : 0.0
max_v = length(params) >= 2 ? parse(Float64, params[2]) : 100.0
levels = length(params) >= 3 ? parse(Int, params[3]) : 100
ThermometerEncoder(engine.reg, name, min_v, max_v; levels=levels)
elseif typ == "CAT"
CategoricalEncoder(engine.reg, name, params)
else
CategoricalEncoder(engine.reg, name, String[])
end
push!(schema, (name, enc))
end
table = create_table(engine.reg, stmt.table_name, engine.dim, schema)
engine.tables[stmt.table_name] = table
println(" OK. Table '$(stmt.table_name)' created with $(length(schema)) columns.")
end
function execute!(engine::VSAEngine, stmt::InsertStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
# Map values to columns
if length(stmt.values) < 1
return println(" ERROR: Need at least ID value.")
end
id = stmt.values[1]
fields = Dict{String, Any}()
for (i, col) in enumerate(table.columns)
vi = i + 1 # +1 because first value is the ID
if vi <= length(stmt.values)
val = tryparse(Float64, stmt.values[vi])
fields[col.name] = val !== nothing ? val : stmt.values[vi]
end
end
vdb_insert!(table, id, fields)
println(" OK. Inserted '$(id)' into '$(stmt.table_name)'.")
end
function execute!(engine::VSAEngine, stmt::SelectStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
if stmt.is_similar
result = vdb_select_similar(table, stmt.similar_id; top_k=stmt.limit)
println(" Plan: $(result.plan)")
println(" Results:")
for (id, score) in zip(result.ids, result.scores)
@printf(" %-10s score=%.4f\n", id, score)
end
@printf(" %d rows returned.\n", length(result.ids))
return
end
if isempty(stmt.where_field)
# SELECT * FROM table (show all IDs)
n = min(stmt.limit, length(table.record_ids))
println(" $(length(table.record_ids)) total records (showing $n):")
for i in 1:n
println(" $(table.record_ids[i])")
end
return
end
op = stmt.where_op == "=" ? :(==) :
stmt.where_op == ">" ? :(>) :
stmt.where_op == "<" ? :(<) : :(==)
# Determine value type
val = tryparse(Float64, stmt.where_value)
value = val !== nothing ? val : stmt.where_value
result = vdb_select(table, stmt.where_field, op, value; top_k=stmt.limit)
println(" Plan: $(result.plan)")
println(" Results:")
for (id, score) in zip(result.ids, result.scores)
@printf(" %-10s score=%.4f\n", id, score)
end
@printf(" %d rows returned.\n", length(result.ids))
end
function execute!(engine::VSAEngine, stmt::DeleteStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
ok = vdb_delete!(table, stmt.id)
println(ok ? " OK. Deleted '$(stmt.id)'. $(length(table.records)) records remain." :
" ERROR: '$(stmt.id)' not found.")
end
function execute!(engine::VSAEngine, stmt::VacuumStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
n = vdb_vacuum!(table)
println(" OK. WAL compacted ($n entries). Indices rebuilt.")
end
function execute!(engine::VSAEngine, stmt::ExplainStmt)
if stmt.inner isa SelectStmt && !isempty(stmt.inner.where_field)
table = get(engine.tables, stmt.inner.table_name, nothing)
table === nothing && return println(" ERROR: Table not found.")
val = tryparse(Float64, stmt.inner.where_value)
value = val !== nothing ? val : stmt.inner.where_value
op = stmt.inner.where_op == "=" ? :(==) : Symbol(stmt.inner.where_op)
vdb_explain(table, stmt.inner.where_field, op, value)
else
println(" EXPLAIN only supports SELECT...WHERE queries.")
end
end
function execute!(engine::VSAEngine, stmt::ShowTablesStmt)
if isempty(engine.tables)
println(" No tables.")
else
for (name, table) in engine.tables
@printf(" %-20s %d records, %d columns\n", name, length(table.records), length(table.columns))
end
end
end
function execute!(engine::VSAEngine, stmt::DescribeStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
vdb_stats(table)
end
function execute!(engine::VSAEngine, stmt::DropTableStmt)
if haskey(engine.tables, stmt.table_name)
n = length(engine.tables[stmt.table_name].records)
delete!(engine.tables, stmt.table_name)
println(" OK. Table '$(stmt.table_name)' dropped ($n records removed).")
else
println(" ERROR: Table '$(stmt.table_name)' not found.")
end
end
function execute!(engine::VSAEngine, stmt::SelectCountStmt)
table = get(engine.tables, stmt.table_name, nothing)
table === nothing && return println(" ERROR: Table '$(stmt.table_name)' not found.")
println(" COUNT(*) = $(vdb_count(table))")
end
# --- Execute SQL String ---
function sql!(engine::VSAEngine, query::String)
try
stmt = parse_sql(query)
t = @elapsed execute!(engine, stmt)
@printf(" (%.3f ms)\n", t * 1000)
catch e
println(" ERROR: ", e)
end
end
# --- Interactive REPL ---
function repl(engine::VSAEngine)
println("VSA Vector Database REPL")
println("Type SQL commands. Type 'exit' to quit.\n")
while true
print("vsa> ")
line = readline()
stripped = strip(line)
isempty(stripped) && continue
lowercase(stripped) == "exit" && break
lowercase(stripped) == "quit" && break
sql!(engine, stripped)
println()
end
println("Goodbye.")
end