Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -102,7 +102,17 @@ def extract_embeddings(text):
|
|
| 102 |
def extract_embeddings_batch(texts):
|
| 103 |
return [extract_embeddings(text) for text in texts]
|
| 104 |
|
| 105 |
-
def compress_semantically(input_text, word_reduction_factor=0.35
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
semantic_embeddings = extract_embeddings(input_text)
|
| 107 |
text_lang = detect_language_en_pt(input_text)
|
| 108 |
stopwords = en_stop_words if text_lang == 'en' else pt_stop_words
|
|
|
|
| 102 |
def extract_embeddings_batch(texts):
|
| 103 |
return [extract_embeddings(text) for text in texts]
|
| 104 |
|
| 105 |
+
def compress_semantically(input_text, word_reduction_factor=0.35):
|
| 106 |
+
|
| 107 |
+
num_samples = 500
|
| 108 |
+
word_count = input_text.split()
|
| 109 |
+
|
| 110 |
+
thresholds = [(1500, 80), (1000, 90), (700, 110), (500, 130), (250, 160)]
|
| 111 |
+
for threshold, value in thresholds:
|
| 112 |
+
if word_count > threshold:
|
| 113 |
+
num_samples = value
|
| 114 |
+
break
|
| 115 |
+
|
| 116 |
semantic_embeddings = extract_embeddings(input_text)
|
| 117 |
text_lang = detect_language_en_pt(input_text)
|
| 118 |
stopwords = en_stop_words if text_lang == 'en' else pt_stop_words
|