Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import re | |
| from collections import Counter | |
| import math | |
| class SimpleEmbedder: | |
| def __init__(self, vector_size=384): | |
| self.vector_size = vector_size | |
| self.word_vectors = {} | |
| def create_embedding(self, text: str) -> np.ndarray: | |
| """Create a simple embedding for text""" | |
| # Clean text | |
| text = text.lower() | |
| text = re.sub(r'[^a-z\\s]', ' ', text) | |
| words = text.split() | |
| if not words: | |
| return np.zeros(self.vector_size) | |
| # Create word frequency vector | |
| word_counts = Counter(words) | |
| # Create embedding | |
| embedding = np.zeros(self.vector_size) | |
| for word, count in word_counts.items(): | |
| # Create deterministic hash-based position | |
| hash_val = hash(word) % self.vector_size | |
| embedding[hash_val] += count | |
| # Normalize | |
| norm = np.linalg.norm(embedding) | |
| if norm > 0: | |
| embedding = embedding / norm | |
| return embedding |