Serbian
procesaur commited on
Commit
edf64d3
·
verified ·
1 Parent(s): 0c7f2e5

Upload 4 files

Browse files
glov.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gensim
2
+ import joblib
3
+ from gensim.models import KeyedVectors
4
+
5
+ if False:
6
+ # Load full model (if memory allows)
7
+ glove = joblib.load("glove_batch.model")
8
+
9
+ # Convert to memory-efficient KeyedVectors format
10
+ kv = gensim.models.KeyedVectors(vector_size=glove["no_components"])
11
+ kv.add_vectors(list(glove["dictionary"].keys()), list(glove["word_vectors"]))
12
+
13
+ # Save smaller KeyedVectors file
14
+ kv.save("glove_keyed_vectors.kv")
15
+ print("Saved as KeyedVectors! Now reload with less memory.")
16
+
17
+
18
+ from gensim.models import KeyedVectors
19
+
20
+ vectors = KeyedVectors.load("glove_keyed_vectors.kv")
21
+ print(vectors.most_similar("sistema", topn=5))
22
+
23
+ text = "posao,raditi"
24
+ a = [x.strip() for x in text.split(",")]
25
+ print(vectors.distance(*a))
glove_batch.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:527abf4964081ec00837d2adad03838fe6628771978c6ecd1c6ee6bf013c11af
3
- size 12040861007
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2676969b75a391484922bce7490dd653346293a4bb732f127f76a81bdf802fcb
3
+ size 12332693044
glove_keyed_vectors.kv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c290229c2f9794d89b8b477d87eaefc869295d488637ce1e9da9480bfda6a86b
3
- size 171106218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d67c8f4b7187ec0fe59a5c56641c079e3d43b51990eb5a378ab87685de0606
3
+ size 177288751
glove_keyed_vectors.kv.vectors.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77ce92596685ccf8815b832aa87a5ba7666df80097e31af331dafe7a9d89f76
3
- size 2929026928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f349b5941fd29848e062b0bb0f1077c3d1f56c1b678116676f8c7b93948741c1
3
+ size 2999514128