Tom Aarsen commited on
Commit ·
cf19736
1
Parent(s): e8e8b51
Keep URL when filtering dataset, removes only id
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ import numpy as np
|
|
| 10 |
|
| 11 |
# Load titles, texts, and int8 embeddings in a lazy Dataset, allowing us to efficiently access specific rows on demand
|
| 12 |
# Note that we never actually use the int8 embeddings for search directly, they are only used for rescoring after the binary search
|
| 13 |
-
title_text_int8_dataset = load_dataset("sentence-transformers/quantized-retrieval-data", split="train").select_columns(["title", "text", "embedding"])
|
| 14 |
# title_text_int8_dataset = load_from_disk("wikipedia-mxbai-embed-int8-index").select_columns(["url", "title", "text", "embedding"])
|
| 15 |
|
| 16 |
# Load the binary indices
|
|
|
|
| 10 |
|
| 11 |
# Load titles, texts, and int8 embeddings in a lazy Dataset, allowing us to efficiently access specific rows on demand
|
| 12 |
# Note that we never actually use the int8 embeddings for search directly, they are only used for rescoring after the binary search
|
| 13 |
+
title_text_int8_dataset = load_dataset("sentence-transformers/quantized-retrieval-data", split="train").select_columns(["url", "title", "text", "embedding"])
|
| 14 |
# title_text_int8_dataset = load_from_disk("wikipedia-mxbai-embed-int8-index").select_columns(["url", "title", "text", "embedding"])
|
| 15 |
|
| 16 |
# Load the binary indices
|