๐w๐
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ import torch
|
|
| 11 |
from threading import Thread
|
| 12 |
|
| 13 |
token = os.environ["HF_TOKEN"]
|
| 14 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
|
| 15 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 16 |
torch_dtype=torch.float16,
|
| 17 |
token=token)
|
|
@@ -25,12 +25,9 @@ title_text_dataset = load_dataset(
|
|
| 25 |
).select_columns(["title", "text"])
|
| 26 |
|
| 27 |
# Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
|
| 28 |
-
int8_view = Index.restore("
|
| 29 |
binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
|
| 30 |
-
"
|
| 31 |
-
)
|
| 32 |
-
binary_ivf: faiss.IndexBinaryIVF = faiss.read_index_binary(
|
| 33 |
-
"wikipedia_ubinary_ivf_faiss_50m.index"
|
| 34 |
)
|
| 35 |
|
| 36 |
# Load the SentenceTransformer model for embedding the queries
|
|
@@ -55,7 +52,7 @@ def search(
|
|
| 55 |
)
|
| 56 |
|
| 57 |
# 3. Search the binary index (either exact or approximate)
|
| 58 |
-
index =
|
| 59 |
_scores, binary_ids = index.search(
|
| 60 |
query_embedding_ubinary, top_k * rescore_multiplier
|
| 61 |
)
|
|
@@ -156,6 +153,6 @@ the models used in this space are :
|
|
| 156 |
demo = gr.ChatInterface(fn=talk,
|
| 157 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
| 158 |
theme="Soft",
|
| 159 |
-
examples=[["
|
| 160 |
title="Text Streaming")
|
| 161 |
demo.launch()
|
|
|
|
| 11 |
from threading import Thread
|
| 12 |
|
| 13 |
token = os.environ["HF_TOKEN"]
|
| 14 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
|
| 15 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 16 |
torch_dtype=torch.float16,
|
| 17 |
token=token)
|
|
|
|
| 25 |
).select_columns(["title", "text"])
|
| 26 |
|
| 27 |
# Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
|
| 28 |
+
int8_view = Index.restore("https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_int8_usearch_1m.index", view=True)
|
| 29 |
binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
|
| 30 |
+
"https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_ubinary_faiss_1m.index"
|
|
|
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
# Load the SentenceTransformer model for embedding the queries
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
# 3. Search the binary index (either exact or approximate)
|
| 55 |
+
index = binary_index
|
| 56 |
_scores, binary_ids = index.search(
|
| 57 |
query_embedding_ubinary, top_k * rescore_multiplier
|
| 58 |
)
|
|
|
|
| 153 |
demo = gr.ChatInterface(fn=talk,
|
| 154 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
| 155 |
theme="Soft",
|
| 156 |
+
examples=[["what is machine learning"]],
|
| 157 |
title="Text Streaming")
|
| 158 |
demo.launch()
|