package semantic // Embedder is the interface for converting text into dense vector // representations. Implementations range from lightweight deterministic // hashing (DummyEmbedder) to real ML models (future LocalEmbedder, // OpenAIEmbedder). type Embedder interface { // Embed converts a batch of text strings into float32 vectors. // All returned vectors must have the same dimensionality. Embed(texts []string) ([][]float32, error) // Strategy returns the name of the embedding strategy (e.g. "dummy", "local", "openai"). Strategy() string } // DummyEmbedder generates deterministic fixed-dimension vectors using a // simple hash of each input string. Useful for architecture testing // without real ML dependencies. type DummyEmbedder struct { Dim int // vector dimensionality (default 64) } // NewDummyEmbedder creates a DummyEmbedder with the given dimensionality. func NewDummyEmbedder(dim int) *DummyEmbedder { if dim <= 0 { dim = 64 } return &DummyEmbedder{Dim: dim} } // Strategy returns "dummy". func (d *DummyEmbedder) Strategy() string { return "dummy" } // Embed generates deterministic pseudo-vectors by hashing each character // of the input string into the vector dimensions. The resulting vectors // have useful properties: identical strings produce identical vectors, // and strings with shared tokens produce vectors with non-zero cosine // similarity. func (d *DummyEmbedder) Embed(texts []string) ([][]float32, error) { result := make([][]float32, len(texts)) for i, text := range texts { result[i] = d.hashVec(text) } return result, nil } // hashVec produces a deterministic float32 vector from a string. func (d *DummyEmbedder) hashVec(s string) []float32 { vec := make([]float32, d.Dim) for i, c := range s { idx := (i*31 + int(c)) % d.Dim if idx < 0 { idx = -idx } vec[idx] += float32(c) / 128.0 } // Normalize to unit length for cosine similarity. var norm float64 for _, v := range vec { norm += float64(v) * float64(v) } if norm > 0 { invNorm := float32(1.0 / sqrt64(norm)) for j := range vec { vec[j] *= invNorm } } return vec } // sqrt64 is a simple float64 square root to avoid importing math in this file. func sqrt64(x float64) float64 { if x <= 0 { return 0 } // Newton's method, 20 iterations is ample for float64 precision. z := x / 2 for i := 0; i < 20; i++ { z = (z + x/z) / 2 } return z }