#include "wayy_db/table.hpp" #include #include #include #include namespace fs = std::filesystem; namespace wayy_db { Table::Table(std::string name) : name_(std::move(name)) {} void Table::add_column(Column column) { if (columns_.empty()) { num_rows_ = column.size(); } else if (column.size() != num_rows_) { throw InvalidOperation( "Column size mismatch: expected " + std::to_string(num_rows_) + ", got " + std::to_string(column.size())); } const std::string& col_name = column.name(); if (column_index_.count(col_name)) { throw InvalidOperation("Column already exists: " + col_name); } column_index_[col_name] = columns_.size(); columns_.push_back(std::move(column)); } void Table::add_column(const std::string& name, DType dtype, void* data, size_t size) { add_column(Column(name, dtype, data, size, true)); } bool Table::has_column(const std::string& name) const { return column_index_.count(name) > 0; } Column& Table::column(const std::string& name) { auto it = column_index_.find(name); if (it == column_index_.end()) { throw ColumnNotFound(name); } return columns_[it->second]; } const Column& Table::column(const std::string& name) const { auto it = column_index_.find(name); if (it == column_index_.end()) { throw ColumnNotFound(name); } return columns_[it->second]; } std::vector Table::column_names() const { std::vector names; names.reserve(columns_.size()); for (const auto& col : columns_) { names.push_back(col.name()); } return names; } void Table::set_sorted_by(const std::string& col) { if (!has_column(col)) { throw ColumnNotFound(col); } sorted_by_ = col; } void Table::save(const std::string& dir_path) const { fs::create_directories(dir_path); // Write metadata write_metadata(dir_path); // Write each column for (const auto& col : columns_) { std::string col_path = dir_path + "/" + col.name() + ".col"; std::ofstream file(col_path, std::ios::binary); if (!file) { throw WayyException("Failed to create column file: " + col_path); } // Write header ColumnHeader header{}; header.magic = WAYY_MAGIC; header.version = WAYY_VERSION; header.dtype = col.dtype(); header.row_count = col.size(); header.compression = 0; header.data_offset = sizeof(ColumnHeader); file.write(reinterpret_cast(&header), sizeof(header)); // Write data file.write(static_cast(col.data()), col.byte_size()); } } void Table::write_metadata(const std::string& dir_path) const { std::string meta_path = dir_path + "/_meta.json"; std::ofstream file(meta_path); if (!file) { throw WayyException("Failed to create metadata file: " + meta_path); } // Simple JSON serialization (no external dependency) file << "{\n"; file << " \"version\": " << WAYY_VERSION << ",\n"; file << " \"name\": \"" << name_ << "\",\n"; file << " \"num_rows\": " << num_rows_ << ",\n"; if (sorted_by_) { file << " \"sorted_by\": \"" << *sorted_by_ << "\",\n"; } else { file << " \"sorted_by\": null,\n"; } file << " \"columns\": [\n"; for (size_t i = 0; i < columns_.size(); ++i) { const auto& col = columns_[i]; file << " {\"name\": \"" << col.name() << "\", \"dtype\": \"" << dtype_to_string(col.dtype()) << "\"}"; if (i + 1 < columns_.size()) file << ","; file << "\n"; } file << " ]\n"; file << "}\n"; } Table Table::load(const std::string& dir_path) { auto [name, num_rows, sorted_by, col_info] = read_metadata(dir_path); Table table(name); for (const auto& [col_name, dtype] : col_info) { std::string col_path = dir_path + "/" + col_name + ".col"; std::ifstream file(col_path, std::ios::binary); if (!file) { throw WayyException("Failed to open column file: " + col_path); } // Read header ColumnHeader header; file.read(reinterpret_cast(&header), sizeof(header)); if (header.magic != WAYY_MAGIC) { throw WayyException("Invalid column file magic: " + col_path); } // Read data size_t byte_size = header.row_count * dtype_size(header.dtype); std::vector data(byte_size); file.read(reinterpret_cast(data.data()), byte_size); table.add_column(Column(col_name, header.dtype, std::move(data))); } if (sorted_by) { table.set_sorted_by(*sorted_by); } return table; } Table Table::mmap(const std::string& dir_path) { auto [name, num_rows, sorted_by, col_info] = read_metadata(dir_path); Table table(name); for (const auto& [col_name, dtype] : col_info) { std::string col_path = dir_path + "/" + col_name + ".col"; MmapFile mmap_file(col_path, MmapFile::Mode::ReadOnly); // Validate header auto* header = static_cast(mmap_file.data()); if (header->magic != WAYY_MAGIC) { throw WayyException("Invalid column file magic: " + col_path); } // Create column pointing to mmap'd data void* data_ptr = static_cast(mmap_file.data()) + header->data_offset; table.add_column(Column(col_name, header->dtype, data_ptr, header->row_count, false)); // Keep mmap file alive table.mmap_files_.push_back(std::move(mmap_file)); } if (sorted_by) { table.set_sorted_by(*sorted_by); } return table; } std::tuple, std::vector>> Table::read_metadata(const std::string& dir_path) { std::string meta_path = dir_path + "/_meta.json"; std::ifstream file(meta_path); if (!file) { throw WayyException("Failed to open metadata file: " + meta_path); } // Simple JSON parsing (minimal implementation) std::stringstream buffer; buffer << file.rdbuf(); std::string json = buffer.str(); // Extract fields using simple string parsing // (In production, use a proper JSON library) auto extract_string = [&json](const std::string& key) -> std::string { std::string pattern = "\"" + key + "\": \""; auto pos = json.find(pattern); if (pos == std::string::npos) return ""; pos += pattern.size(); auto end = json.find("\"", pos); return json.substr(pos, end - pos); }; auto extract_int = [&json](const std::string& key) -> size_t { std::string pattern = "\"" + key + "\": "; auto pos = json.find(pattern); if (pos == std::string::npos) return 0; pos += pattern.size(); return std::stoull(json.substr(pos)); }; std::string name = extract_string("name"); size_t num_rows = extract_int("num_rows"); std::optional sorted_by; std::string sorted_str = extract_string("sorted_by"); if (!sorted_str.empty()) { sorted_by = sorted_str; } // Parse columns array std::vector> columns; auto cols_start = json.find("\"columns\":"); if (cols_start != std::string::npos) { auto arr_start = json.find("[", cols_start); auto arr_end = json.find("]", arr_start); std::string arr = json.substr(arr_start, arr_end - arr_start + 1); size_t pos = 0; while ((pos = arr.find("{", pos)) != std::string::npos) { auto obj_end = arr.find("}", pos); std::string obj = arr.substr(pos, obj_end - pos + 1); // Extract name and dtype from object auto name_pos = obj.find("\"name\": \""); if (name_pos != std::string::npos) { name_pos += 9; auto name_end = obj.find("\"", name_pos); std::string col_name = obj.substr(name_pos, name_end - name_pos); auto dtype_pos = obj.find("\"dtype\": \""); dtype_pos += 10; auto dtype_end = obj.find("\"", dtype_pos); std::string dtype_str = obj.substr(dtype_pos, dtype_end - dtype_pos); columns.emplace_back(col_name, dtype_from_string(dtype_str)); } pos = obj_end + 1; } } return {name, num_rows, sorted_by, columns}; } } // namespace wayy_db