Spaces:

marshad180
/

Atomic-VSA

Sleeping

App Files Files Community

Atomic-VSA / src /vsa_encoding.jl

marshad180

Update Atomic VSA deployment

fa6bd30 verified 21 days ago

raw

history blame contribute delete

5.78 kB

	# ==============================================================================
	# VSA ENCODING LAYER
	# Thermometer, Categorical, and Ordinal Encoders
	# Mirrors: atom_factory.rs (ThermometerEncoder, CategoricalEncoder, OrdinalEncoder)
	# ==============================================================================

	# --- Abstract Encoder ---

	abstract type VSAEncoder end

	# --- Thermometer Encoder ---
	# Numeric values → cumulative atom superposition
	# Close values share many levels → high similarity
	# Distant values share few levels → low similarity

	struct ThermometerEncoder <: VSAEncoder
	reg::VSARegistry
	sector::String # Registry sector for level atoms
	field_name::String # Field identifier
	min_val::Float64
	max_val::Float64
	levels::Int # Number of discretization levels
	end

	function ThermometerEncoder(reg::VSARegistry, field_name::String, min_val, max_val; levels=100)
	return ThermometerEncoder(reg, "thermo_$(field_name)", field_name, Float64(min_val), Float64(max_val), levels)
	end

	function encode(enc::ThermometerEncoder, value::Real, d::Int)
	# Clamp to range
	v = clamp(Float64(value), enc.min_val, enc.max_val)

	# Normalize to [0, 1]
	range_size = enc.max_val - enc.min_val
	normalized = range_size > 0 ? (v - enc.min_val) / range_size : 0.5

	# How many levels to activate (thermometer style)
	num_active = max(1, ceil(Int, normalized * enc.levels))

	# Optimize: Single allocation for result
	res_vec = zeros(Float32, d)
	base = get_element(enc.reg, enc.sector, "base", d)

	# Base vector (SingleData)
	b_vec = base.data.vec

	# In-place bundling of shifted levels
	temp_vec = Vector{Float32}(undef, d)
	for i in 1:num_active
	# Efficient circular shift and accumulate
	# Use a simplified shift logic to avoid heavy allocations
	s = mod(i, d)
	if s == 0
	bundle!(res_vec, b_vec)
	else
	@inbounds for j in 1:d
	target_idx = j + s
	if target_idx > d target_idx -= d end
	res_vec[target_idx] += b_vec[j]
	end
	end
	end

	return Atom(SingleData(res_vec))
	end

	function expected_similarity(enc::ThermometerEncoder, v1::Real, v2::Real)
	range_size = enc.max_val - enc.min_val
	n1 = range_size > 0 ? clamp((Float64(v1) - enc.min_val) / range_size, 0, 1) : 0.5
	n2 = range_size > 0 ? clamp((Float64(v2) - enc.min_val) / range_size, 0, 1) : 0.5

	levels1 = max(1, ceil(Int, n1 * enc.levels))
	levels2 = max(1, ceil(Int, n2 * enc.levels))

	overlap = min(levels1, levels2)
	union = max(levels1, levels2)
	return union > 0 ? Float32(overlap / union) : 1.0f0
	end

	# --- Categorical Encoder ---
	# Discrete labels → orthogonal atoms from Registry
	# Each category gets its own stable random atom

	struct CategoricalEncoder <: VSAEncoder
	reg::VSARegistry
	sector::String
	field_name::String
	categories::Vector{String}
	end

	function CategoricalEncoder(reg::VSARegistry, field_name::String, categories::Vector{String})
	return CategoricalEncoder(reg, "cat_$(field_name)", field_name, categories)
	end

	function encode(enc::CategoricalEncoder, value::String, d::Int)
	# Each category → unique stable atom from Registry
	return get_element(enc.reg, enc.sector, value, d)
	end

	# --- Ordinal Encoder ---
	# Ordered discrete values → indexed atoms with progressive similarity

	struct OrdinalEncoder <: VSAEncoder
	reg::VSARegistry
	sector::String
	field_name::String
	values::Vector{String}
	end

	function OrdinalEncoder(reg::VSARegistry, field_name::String, values::Vector{String})
	return OrdinalEncoder(reg, "ord_$(field_name)", field_name, values)
	end

	function encode(enc::OrdinalEncoder, value::String, d::Int)
	return get_element(enc.reg, enc.sector, value, d)
	end

	# --- Permutation Helper ---
	# Circular shift of atom vector (used by Thermometer levels)

	function permute_atom(atom::Atom, shift::Int)
	if atom.data isa SingleData
	vec = atom.data.vec
	d = length(vec)
	s = mod(shift, d)
	s == 0 && return atom

	# Optimized circular shift
	new_vec = Vector{Float32}(undef, d)
	@inbounds for i in 1:d
	src_idx = i - s
	if src_idx < 1 src_idx += d end
	new_vec[i] = vec[src_idx]
	end
	return Atom(SingleData(new_vec))

	elseif atom.data isa BinaryData
	# For binary: circular bit shift
	chunks = atom.data.chunks
	dim = atom.data.dim
	s = mod(shift, dim)
	s == 0 && return atom

	# Simplified bit shifting logic
	# For max performance we would use bit-level shifting on chunks,
	# but for now we optimize the bit extraction/packing loop
	n_chunks = length(chunks)
	new_chunks = zeros(UInt64, n_chunks)

	@inbounds for i in 1:dim
	# Get bit from original
	src_idx = i - s
	if src_idx < 1 src_idx += dim end

	sc_idx = ((src_idx - 1) ÷ 64) + 1
	sb_idx = (src_idx - 1) % 64
	bit = (chunks[sc_idx] >> sb_idx) & 1

	if bit == 1
	dc_idx = ((i - 1) ÷ 64) + 1
	db_idx = (i - 1) % 64
	new_chunks[dc_idx] \|= UInt64(1) << db_idx
	end
	end
	return Atom(BinaryData(new_chunks, dim))
	end
	return atom
	end

	# --- Schema Definition ---

	struct FieldSchema
	name::String
	encoder::VSAEncoder
	end