trojkat commited on
Commit
54cc7a5
·
verified ·
1 Parent(s): 894aa8c

Update text_splitter.py

Browse files
Files changed (1) hide show
  1. text_splitter.py +4 -0
text_splitter.py CHANGED
@@ -31,6 +31,10 @@ def activate_similarities(similarities:np.array, p_size=10)->np.array:
31
  return activated_similarities
32
 
33
  def split_into_paragraphs(text):
 
 
 
 
34
  activated_similarities = activate_similarities(similarities, p_size=5)
35
  minmimas = argrelextrema(activated_similarities, np.less, order=2)
36
  split_points = [each for each in minmimas[0]]
 
31
  return activated_similarities
32
 
33
  def split_into_paragraphs(text):
34
+ model = SentenceTransformer('all-mpnet-base-v2')
35
+ sentences = text.split('. ')
36
+ embeddings = model.encode(sentences)
37
+ similarities = cosine_similarity(embeddings)
38
  activated_similarities = activate_similarities(similarities, p_size=5)
39
  minmimas = argrelextrema(activated_similarities, np.less, order=2)
40
  split_points = [each for each in minmimas[0]]