Tom Aarsen commited on
Commit
cf19736
·
1 Parent(s): e8e8b51

Keep URL when filtering dataset, removes only id

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -10,7 +10,7 @@ import numpy as np
10
 
11
  # Load titles, texts, and int8 embeddings in a lazy Dataset, allowing us to efficiently access specific rows on demand
12
  # Note that we never actually use the int8 embeddings for search directly, they are only used for rescoring after the binary search
13
- title_text_int8_dataset = load_dataset("sentence-transformers/quantized-retrieval-data", split="train").select_columns(["title", "text", "embedding"])
14
  # title_text_int8_dataset = load_from_disk("wikipedia-mxbai-embed-int8-index").select_columns(["url", "title", "text", "embedding"])
15
 
16
  # Load the binary indices
 
10
 
11
  # Load titles, texts, and int8 embeddings in a lazy Dataset, allowing us to efficiently access specific rows on demand
12
  # Note that we never actually use the int8 embeddings for search directly, they are only used for rescoring after the binary search
13
+ title_text_int8_dataset = load_dataset("sentence-transformers/quantized-retrieval-data", split="train").select_columns(["url", "title", "text", "embedding"])
14
  # title_text_int8_dataset = load_from_disk("wikipedia-mxbai-embed-int8-index").select_columns(["url", "title", "text", "embedding"])
15
 
16
  # Load the binary indices