Spaces:
Runtime error
Runtime error
File size: 2,578 Bytes
bf20cb7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #pragma once
#include "wayy_db/types.hpp"
#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
namespace wayy_db {
/// Arrow-style variable-length string column.
/// Storage layout:
/// offsets_: int64_t[N+1] — byte offsets into data_
/// data_: uint8_t[] — concatenated UTF-8 bytes
/// validity_: uint8_t[] — 1 bit per row (bit=1 valid, bit=0 null)
///
/// String at row i = data_[offsets_[i] .. offsets_[i+1]]
class StringColumn {
public:
/// Construct an empty string column
explicit StringColumn(std::string name = "");
/// Move-only semantics
StringColumn(StringColumn&&) = default;
StringColumn& operator=(StringColumn&&) = default;
StringColumn(const StringColumn&) = delete;
StringColumn& operator=(const StringColumn&) = delete;
/// Column metadata
const std::string& name() const { return name_; }
DType dtype() const { return DType::String; }
size_t size() const { return offsets_.empty() ? 0 : offsets_.size() - 1; }
size_t data_bytes() const { return data_.size(); }
/// Read a string at the given row
std::string_view get(size_t row) const;
/// Append a new string
void append(std::string_view val);
/// Append a null value
void append_null();
/// Overwrite the string at a given row.
/// If the new string fits in the existing slot, it's written in-place.
/// Otherwise, old slot is wasted and the new value is appended to data_.
void set(size_t row, std::string_view val);
/// Validity bitmap
bool has_validity() const { return has_validity_; }
bool is_valid(size_t row) const;
void set_valid(size_t row, bool valid);
size_t count_valid() const;
/// Persistence
void save(const std::string& dir_path, const std::string& col_name) const;
static StringColumn load(const std::string& dir_path, const std::string& col_name);
/// Direct access for bulk operations
const std::vector<int64_t>& offsets() const { return offsets_; }
const std::vector<uint8_t>& data_buf() const { return data_; }
const std::vector<uint8_t>& validity_bitmap() const { return validity_; }
/// Collect all strings as a vector (copy)
std::vector<std::string> to_vector() const;
private:
std::string name_;
std::vector<int64_t> offsets_; // N+1 offsets
std::vector<uint8_t> data_; // Concatenated UTF-8 bytes
std::vector<uint8_t> validity_; // Null bitmap
bool has_validity_ = false;
void ensure_validity();
};
} // namespace wayy_db
|