mshabir commited on
Commit
d56acff
·
verified ·
1 Parent(s): e13bd2f

Create simple_embedding.py

Browse files
Files changed (1) hide show
  1. simple_embedding.py +37 -0
simple_embedding.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import re
3
+ from collections import Counter
4
+ import math
5
+
6
+ class SimpleEmbedder:
7
+ def __init__(self, vector_size=384):
8
+ self.vector_size = vector_size
9
+ self.word_vectors = {}
10
+
11
+ def create_embedding(self, text: str) -> np.ndarray:
12
+ """Create a simple embedding for text"""
13
+ # Clean text
14
+ text = text.lower()
15
+ text = re.sub(r'[^a-z\\s]', ' ', text)
16
+ words = text.split()
17
+
18
+ if not words:
19
+ return np.zeros(self.vector_size)
20
+
21
+ # Create word frequency vector
22
+ word_counts = Counter(words)
23
+
24
+ # Create embedding
25
+ embedding = np.zeros(self.vector_size)
26
+
27
+ for word, count in word_counts.items():
28
+ # Create deterministic hash-based position
29
+ hash_val = hash(word) % self.vector_size
30
+ embedding[hash_val] += count
31
+
32
+ # Normalize
33
+ norm = np.linalg.norm(embedding)
34
+ if norm > 0:
35
+ embedding = embedding / norm
36
+
37
+ return embedding