import numpy as np import re from collections import Counter import math class SimpleEmbedder: def __init__(self, vector_size=384): self.vector_size = vector_size self.word_vectors = {} def create_embedding(self, text: str) -> np.ndarray: """Create a simple embedding for text""" # Clean text text = text.lower() text = re.sub(r'[^a-z\\s]', ' ', text) words = text.split() if not words: return np.zeros(self.vector_size) # Create word frequency vector word_counts = Counter(words) # Create embedding embedding = np.zeros(self.vector_size) for word, count in word_counts.items(): # Create deterministic hash-based position hash_val = hash(word) % self.vector_size embedding[hash_val] += count # Normalize norm = np.linalg.norm(embedding) if norm > 0: embedding = embedding / norm return embedding