Update text_splitter.py
Browse files- text_splitter.py +4 -0
text_splitter.py
CHANGED
|
@@ -31,6 +31,10 @@ def activate_similarities(similarities:np.array, p_size=10)->np.array:
|
|
| 31 |
return activated_similarities
|
| 32 |
|
| 33 |
def split_into_paragraphs(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
activated_similarities = activate_similarities(similarities, p_size=5)
|
| 35 |
minmimas = argrelextrema(activated_similarities, np.less, order=2)
|
| 36 |
split_points = [each for each in minmimas[0]]
|
|
|
|
| 31 |
return activated_similarities
|
| 32 |
|
| 33 |
def split_into_paragraphs(text):
|
| 34 |
+
model = SentenceTransformer('all-mpnet-base-v2')
|
| 35 |
+
sentences = text.split('. ')
|
| 36 |
+
embeddings = model.encode(sentences)
|
| 37 |
+
similarities = cosine_similarity(embeddings)
|
| 38 |
activated_similarities = activate_similarities(similarities, p_size=5)
|
| 39 |
minmimas = argrelextrema(activated_similarities, np.less, order=2)
|
| 40 |
split_points = [each for each in minmimas[0]]
|