Spaces:
Running
on
Zero
Running
on
Zero
Liam Dyer
commited on
idk if i could do this less efficiently
Browse files
app.py
CHANGED
|
@@ -153,7 +153,10 @@ def predict(queries, documents, max_characters) -> list[list[str]]:
|
|
| 153 |
# Getting a structure like [[chunk, ...]]
|
| 154 |
document_embeddings = [[] for _ in range(len(documents))]
|
| 155 |
total_chars = 0
|
| 156 |
-
while
|
|
|
|
|
|
|
|
|
|
| 157 |
for query, doc_scores in query_embeddings.items():
|
| 158 |
if len(doc_scores) == 0:
|
| 159 |
continue
|
|
@@ -176,6 +179,12 @@ def predict(queries, documents, max_characters) -> list[list[str]]:
|
|
| 176 |
document_embeddings[doc_idx].append(chunk_idx)
|
| 177 |
total_chars += len(chunk)
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
return document_embeddings
|
| 180 |
|
| 181 |
|
|
|
|
| 153 |
# Getting a structure like [[chunk, ...]]
|
| 154 |
document_embeddings = [[] for _ in range(len(documents))]
|
| 155 |
total_chars = 0
|
| 156 |
+
while (
|
| 157 |
+
total_chars < max_characters
|
| 158 |
+
and sum([len(x) for x in query_embeddings.values()]) > 0
|
| 159 |
+
):
|
| 160 |
for query, doc_scores in query_embeddings.items():
|
| 161 |
if len(doc_scores) == 0:
|
| 162 |
continue
|
|
|
|
| 179 |
document_embeddings[doc_idx].append(chunk_idx)
|
| 180 |
total_chars += len(chunk)
|
| 181 |
|
| 182 |
+
# Get the actual text for the chunks
|
| 183 |
+
document_embeddings = [
|
| 184 |
+
[chunked_docs[doc_idx][chunk_idx] for chunk_idx in chunks]
|
| 185 |
+
for doc_idx, chunks in enumerate(document_embeddings)
|
| 186 |
+
]
|
| 187 |
+
|
| 188 |
return document_embeddings
|
| 189 |
|
| 190 |
|