Updates
Browse files
app.py
CHANGED
|
@@ -122,6 +122,11 @@ def display_word_differences(x: str, y: str) -> str:
|
|
| 122 |
diff = ndiff(x.split(), y.split())
|
| 123 |
return " ".join([word for word in diff if word.startswith(("+", "-"))])
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def perform_deduplication(
|
| 126 |
deduplication_type,
|
| 127 |
dataset1_name,
|
|
@@ -159,7 +164,8 @@ def perform_deduplication(
|
|
| 159 |
# Compute embeddings
|
| 160 |
status = "Computing embeddings for Dataset 1..."
|
| 161 |
yield status, ""
|
| 162 |
-
embedding_matrix =
|
|
|
|
| 163 |
# embedding_matrix = compute_embeddings(
|
| 164 |
# texts,
|
| 165 |
# batch_size=64,
|
|
|
|
| 122 |
diff = ndiff(x.split(), y.split())
|
| 123 |
return " ".join([word for word in diff if word.startswith(("+", "-"))])
|
| 124 |
|
| 125 |
+
|
| 126 |
+
def encode_texts(texts, progress=None):
|
| 127 |
+
embedding_matrix = model.encode(texts, show_progressbar=False)
|
| 128 |
+
return embedding_matrix
|
| 129 |
+
|
| 130 |
def perform_deduplication(
|
| 131 |
deduplication_type,
|
| 132 |
dataset1_name,
|
|
|
|
| 164 |
# Compute embeddings
|
| 165 |
status = "Computing embeddings for Dataset 1..."
|
| 166 |
yield status, ""
|
| 167 |
+
embedding_matrix = encode_texts(texts, progress=progress)
|
| 168 |
+
#embedding_matrix = model.encode(texts, show_progressbar=True)
|
| 169 |
# embedding_matrix = compute_embeddings(
|
| 170 |
# texts,
|
| 171 |
# batch_size=64,
|