Spaces:

cnmoro
/

SemanticCompression

Running

cnmoro commited on May 27, 2024

Commit

b76aadb

verified ·

1 Parent(s): 7838009

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,7 +102,17 @@ def extract_embeddings(text):
 def extract_embeddings_batch(texts):
     return [extract_embeddings(text) for text in texts]
-def compress_semantically(input_text, word_reduction_factor=0.35, num_samples=500):
     semantic_embeddings = extract_embeddings(input_text)
     text_lang = detect_language_en_pt(input_text)
     stopwords = en_stop_words if text_lang == 'en' else pt_stop_words

 def extract_embeddings_batch(texts):
     return [extract_embeddings(text) for text in texts]
+def compress_semantically(input_text, word_reduction_factor=0.35):
+    num_samples = 500
+    word_count = input_text.split()
+    thresholds = [(1500, 80), (1000, 90), (700, 110), (500, 130), (250, 160)]
+    for threshold, value in thresholds:
+        if word_count > threshold:
+            num_samples = value
+            break
     semantic_embeddings = extract_embeddings(input_text)
     text_lang = detect_language_en_pt(input_text)
     stopwords = en_stop_words if text_lang == 'en' else pt_stop_words