Medical-RAG-Assistant / simple_embedding.py
mshabir's picture
Create simple_embedding.py
d56acff verified
import numpy as np
import re
from collections import Counter
import math
class SimpleEmbedder:
def __init__(self, vector_size=384):
self.vector_size = vector_size
self.word_vectors = {}
def create_embedding(self, text: str) -> np.ndarray:
"""Create a simple embedding for text"""
# Clean text
text = text.lower()
text = re.sub(r'[^a-z\\s]', ' ', text)
words = text.split()
if not words:
return np.zeros(self.vector_size)
# Create word frequency vector
word_counts = Counter(words)
# Create embedding
embedding = np.zeros(self.vector_size)
for word, count in word_counts.items():
# Create deterministic hash-based position
hash_val = hash(word) % self.vector_size
embedding[hash_val] += count
# Normalize
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
return embedding