chatinterface-test-ui

Running

App Files Files Community

not-lain commited on Mar 30, 2024

Commit

95140c0

1 Parent(s): 07ffad3

🌘w🌖

Browse files

Files changed (1) hide show

app.py +5 -8

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import torch
 from threading import Thread
 token = os.environ["HF_TOKEN"]
-model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
                                              # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                                              torch_dtype=torch.float16,
                                              token=token)
@@ -25,12 +25,9 @@ title_text_dataset = load_dataset(
 ).select_columns(["title", "text"])
 # Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
-int8_view = Index.restore("wikipedia_int8_usearch_50m.index", view=True)
 binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
-    "wikipedia_ubinary_faiss_50m.index"
-)
-binary_ivf: faiss.IndexBinaryIVF = faiss.read_index_binary(
-    "wikipedia_ubinary_ivf_faiss_50m.index"
 )
 # Load the SentenceTransformer model for embedding the queries
@@ -55,7 +52,7 @@ def search(
     )
     # 3. Search the binary index (either exact or approximate)
-    index = binary_ivf if use_approx else binary_index
     _scores, binary_ids = index.search(
         query_embedding_ubinary, top_k * rescore_multiplier
     )
@@ -156,6 +153,6 @@ the models used in this space are :
 demo = gr.ChatInterface(fn=talk,
                         chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
                         theme="Soft",
-                        examples=[["Write me a poem about Machine Learning."]],
                         title="Text Streaming")
 demo.launch()

 from threading import Thread
 token = os.environ["HF_TOKEN"]
+model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
                                              # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                                              torch_dtype=torch.float16,
                                              token=token)
 ).select_columns(["title", "text"])
 # Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
+int8_view = Index.restore("https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_int8_usearch_1m.index", view=True)
 binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
+    "https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_ubinary_faiss_1m.index"
 )
 # Load the SentenceTransformer model for embedding the queries
     )
     # 3. Search the binary index (either exact or approximate)
+    index = binary_index
     _scores, binary_ids = index.search(
         query_embedding_ubinary, top_k * rescore_multiplier
     )
 demo = gr.ChatInterface(fn=talk,
                         chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
                         theme="Soft",
+                        examples=[["what is machine learning"]],
                         title="Text Streaming")
 demo.launch()