Buckets:

rtrm's picture
download
raw
25.7 kB
import{s as ct,n as rt,o as ot}from"../chunks/scheduler.d75c11ed.js";import{S as mt,i as dt,e as p,s as l,c,h as ht,a as i,d as a,b as n,f as it,g as r,j as o,k as J,l as ut,m as e,n as m,t as d,o as h,p as u}from"../chunks/index.4ec9dfe9.js";import{C as gt,H as zs,E as Jt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ee0f129e.js";import{C as g}from"../chunks/CodeBlock.5919a092.js";function yt(Qs){let y,as,ss,es,U,ls,X,ns,k,Es='<a href="https://github.com/facebookresearch/faiss" rel="nofollow">FAISS</a> and <a href="https://www.elastic.co/elasticsearch/" rel="nofollow">Elasticsearch</a> enables searching for examples in a dataset. This can be useful when you want to retrieve specific examples from a dataset that are relevant to your NLP task. For example, if you are working on an Open Domain Question Answering task, you may want to only return examples that are relevant to answering your question.',ps,R,Ys="This guide will show you how to build an index for your dataset that will allow you to search it.",is,W,cs,$,Bs='FAISS retrieves documents based on the similarity of their vector representations. In this example, you will generate the vector representations with the <a href="https://huggingface.co/transformers/model_doc/dpr.html" rel="nofollow">DPR</a> model.',rs,q,Hs="<li>Download the DPR model from 🤗 Transformers:</li>",os,F,ms,M,Ls="<li>Load your dataset and compute the vector representations:</li>",ds,N,hs,j,Ds='<li>Create the index with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.add_faiss_index">Dataset.add_faiss_index()</a>:</li>',us,C,gs,f,Ss='<li>Now you can query your dataset with the <code>embeddings</code> index. Load the DPR Question Encoder, and search for a question with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.get_nearest_examples">Dataset.get_nearest_examples()</a>:</li>',Js,G,ys,T,As='<li>You can access the index with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.get_index">Dataset.get_index()</a> and use it for special operations, e.g. query it using <code>range_search</code>:</li>',Ms,v,js,w,Ps='<li>When you are done querying, save the index on disk with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.save_faiss_index">Dataset.save_faiss_index()</a>:</li>',fs,V,Ts,_,Ks='<li>Reload it at a later time with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.load_faiss_index">Dataset.load_faiss_index()</a>:</li>',ws,z,_s,Q,bs,E,Os="Unlike FAISS, Elasticsearch retrieves documents based on exact matches.",xs,Y,st='Start Elasticsearch on your machine, or see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/setup.html" rel="nofollow">Elasticsearch installation guide</a> if you don’t already have it installed.',Zs,B,tt="<li>Load the dataset you want to index:</li>",Is,H,Us,b,at='<li>Build the index with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.add_elasticsearch_index">Dataset.add_elasticsearch_index()</a>:</li>',Xs,L,ks,x,et='<li>Then you can query the <code>context</code> index with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.get_nearest_examples">Dataset.get_nearest_examples()</a>:</li>',Rs,D,Ws,Z,lt="<li>If you want to reuse the index, define the <code>es_index_name</code> parameter when you build the index:</li>",$s,S,qs,I,nt='<li>Reload it later with the index name when you call <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.load_elasticsearch_index">Dataset.load_elasticsearch_index()</a>:</li>',Fs,A,Ns,P,pt="For more advanced Elasticsearch usage, you can specify your own configuration with custom settings:",Cs,K,Gs,O,vs,ts,Vs;return U=new gt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),X=new zs({props:{title:"Search index",local:"search-index",headingTag:"h1"}}),W=new zs({props:{title:"FAISS",local:"faiss",headingTag:"h2"}}),F=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERQUkNvbnRleHRFbmNvZGVyJTJDJTIwRFBSQ29udGV4dEVuY29kZXJUb2tlbml6ZXIlMEFpbXBvcnQlMjB0b3JjaCUwQXRvcmNoLnNldF9ncmFkX2VuYWJsZWQoRmFsc2UpJTBBY3R4X2VuY29kZXIlMjAlM0QlMjBEUFJDb250ZXh0RW5jb2Rlci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkcHItY3R4X2VuY29kZXItc2luZ2xlLW5xLWJhc2UlMjIpJTBBY3R4X3Rva2VuaXplciUyMCUzRCUyMERQUkNvbnRleHRFbmNvZGVyVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRmRwci1jdHhfZW5jb2Rlci1zaW5nbGUtbnEtYmFzZSUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DPRContextEncoder, DPRContextEncoderTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>torch.set_grad_enabled(<span class="hljs-literal">False</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>ctx_encoder = DPRContextEncoder.from_pretrained(<span class="hljs-string">&quot;facebook/dpr-ctx_encoder-single-nq-base&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>ctx_tokenizer = DPRContextEncoderTokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/dpr-ctx_encoder-single-nq-base&quot;</span>)`,wrap:!1}}),N=new g({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2NvbW11bml0eS1kYXRhc2V0cyUyRmNyaW1lX2FuZF9wdW5pc2gnJTJDJTIwc3BsaXQlM0QndHJhaW4lNUIlM0ExMDAlNUQnKSUwQWRzX3dpdGhfZW1iZWRkaW5ncyUyMCUzRCUyMGRzLm1hcChsYW1iZGElMjBleGFtcGxlJTNBJTIwJTdCJ2VtYmVkZGluZ3MnJTNBJTIwY3R4X2VuY29kZXIoKipjdHhfdG9rZW5pemVyKGV4YW1wbGUlNUIlMjJsaW5lJTIyJTVEJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMikpJTVCMCU1RCU1QjAlNUQubnVtcHkoKSU3RCk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>ds = load_dataset(<span class="hljs-string">&#x27;community-datasets/crime_and_punish&#x27;</span>, split=<span class="hljs-string">&#x27;train[:100]&#x27;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>ds_with_embeddings = ds.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> example: {<span class="hljs-string">&#x27;embeddings&#x27;</span>: ctx_encoder(**ctx_tokenizer(example[<span class="hljs-string">&quot;line&quot;</span>], return_tensors=<span class="hljs-string">&quot;pt&quot;</span>))[<span class="hljs-number">0</span>][<span class="hljs-number">0</span>].numpy()})`,wrap:!1}}),C=new g({props:{code:"ZHNfd2l0aF9lbWJlZGRpbmdzLmFkZF9mYWlzc19pbmRleChjb2x1bW4lM0QnZW1iZWRkaW5ncycp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>ds_with_embeddings.add_faiss_index(column=<span class="hljs-string">&#x27;embeddings&#x27;</span>)',wrap:!1}}),G=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERQUlF1ZXN0aW9uRW5jb2RlciUyQyUyMERQUlF1ZXN0aW9uRW5jb2RlclRva2VuaXplciUwQXFfZW5jb2RlciUyMCUzRCUyMERQUlF1ZXN0aW9uRW5jb2Rlci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkcHItcXVlc3Rpb25fZW5jb2Rlci1zaW5nbGUtbnEtYmFzZSUyMiklMEFxX3Rva2VuaXplciUyMCUzRCUyMERQUlF1ZXN0aW9uRW5jb2RlclRva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkcHItcXVlc3Rpb25fZW5jb2Rlci1zaW5nbGUtbnEtYmFzZSUyMiklMEElMEFxdWVzdGlvbiUyMCUzRCUyMCUyMklzJTIwaXQlMjBzZXJpb3VzJTIwJTNGJTIyJTBBcXVlc3Rpb25fZW1iZWRkaW5nJTIwJTNEJTIwcV9lbmNvZGVyKCoqcV90b2tlbml6ZXIocXVlc3Rpb24lMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSklNUIwJTVEJTVCMCU1RC5udW1weSgpJTBBc2NvcmVzJTJDJTIwcmV0cmlldmVkX2V4YW1wbGVzJTIwJTNEJTIwZHNfd2l0aF9lbWJlZGRpbmdzLmdldF9uZWFyZXN0X2V4YW1wbGVzKCdlbWJlZGRpbmdzJyUyQyUyMHF1ZXN0aW9uX2VtYmVkZGluZyUyQyUyMGslM0QxMCklMEFyZXRyaWV2ZWRfZXhhbXBsZXMlNUIlMjJsaW5lJTIyJTVEJTVCMCU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DPRQuestionEncoder, DPRQuestionEncoderTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>q_encoder = DPRQuestionEncoder.from_pretrained(<span class="hljs-string">&quot;facebook/dpr-question_encoder-single-nq-base&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>q_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/dpr-question_encoder-single-nq-base&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>question = <span class="hljs-string">&quot;Is it serious ?&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>question_embedding = q_encoder(**q_tokenizer(question, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>))[<span class="hljs-number">0</span>][<span class="hljs-number">0</span>].numpy()
<span class="hljs-meta">&gt;&gt;&gt; </span>scores, retrieved_examples = ds_with_embeddings.get_nearest_examples(<span class="hljs-string">&#x27;embeddings&#x27;</span>, question_embedding, k=<span class="hljs-number">10</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>retrieved_examples[<span class="hljs-string">&quot;line&quot;</span>][<span class="hljs-number">0</span>]
<span class="hljs-string">&#x27;_that_ serious? It is not serious at all. It’s simply a fantasy to amuse\\r\\n&#x27;</span>`,wrap:!1}}),v=new g({props:{code:"ZmFpc3NfaW5kZXglMjAlM0QlMjBkc193aXRoX2VtYmVkZGluZ3MuZ2V0X2luZGV4KCdlbWJlZGRpbmdzJykuZmFpc3NfaW5kZXglMEFsaW1pdHMlMkMlMjBkaXN0YW5jZXMlMkMlMjBpbmRpY2VzJTIwJTNEJTIwZmFpc3NfaW5kZXgucmFuZ2Vfc2VhcmNoKHglM0RxdWVzdGlvbl9lbWJlZGRpbmcucmVzaGFwZSgxJTJDJTIwLTEpJTJDJTIwdGhyZXNoJTNEMC45NSk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>faiss_index = ds_with_embeddings.get_index(<span class="hljs-string">&#x27;embeddings&#x27;</span>).faiss_index
<span class="hljs-meta">&gt;&gt;&gt; </span>limits, distances, indices = faiss_index.range_search(x=question_embedding.reshape(<span class="hljs-number">1</span>, -<span class="hljs-number">1</span>), thresh=<span class="hljs-number">0.95</span>)`,wrap:!1}}),V=new g({props:{code:"ZHNfd2l0aF9lbWJlZGRpbmdzLnNhdmVfZmFpc3NfaW5kZXgoJ2VtYmVkZGluZ3MnJTJDJTIwJ215X2luZGV4LmZhaXNzJyk=",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>ds_with_embeddings.save_faiss_index(<span class="hljs-string">&#x27;embeddings&#x27;</span>, <span class="hljs-string">&#x27;my_index.faiss&#x27;</span>)',wrap:!1}}),z=new g({props:{code:"ZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2NvbW11bml0eS1kYXRhc2V0cyUyRmNyaW1lX2FuZF9wdW5pc2gnJTJDJTIwc3BsaXQlM0QndHJhaW4lNUIlM0ExMDAlNUQnKSUwQWRzLmxvYWRfZmFpc3NfaW5kZXgoJ2VtYmVkZGluZ3MnJTJDJTIwJ215X2luZGV4LmZhaXNzJyk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>ds = load_dataset(<span class="hljs-string">&#x27;community-datasets/crime_and_punish&#x27;</span>, split=<span class="hljs-string">&#x27;train[:100]&#x27;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>ds.load_faiss_index(<span class="hljs-string">&#x27;embeddings&#x27;</span>, <span class="hljs-string">&#x27;my_index.faiss&#x27;</span>)`,wrap:!1}}),Q=new zs({props:{title:"Elasticsearch",local:"elasticsearch",headingTag:"h2"}}),H=new g({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBc3F1YWQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ3JhanB1cmthciUyRnNxdWFkJyUyQyUyMHNwbGl0JTNEJ3ZhbGlkYXRpb24nKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>squad = load_dataset(<span class="hljs-string">&#x27;rajpurkar/squad&#x27;</span>, split=<span class="hljs-string">&#x27;validation&#x27;</span>)`,wrap:!1}}),L=new g({props:{code:"c3F1YWQuYWRkX2VsYXN0aWNzZWFyY2hfaW5kZXgoJTIyY29udGV4dCUyMiUyQyUyMGhvc3QlM0QlMjJsb2NhbGhvc3QlMjIlMkMlMjBwb3J0JTNEJTIyOTIwMCUyMik=",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>squad.add_elasticsearch_index(<span class="hljs-string">&quot;context&quot;</span>, host=<span class="hljs-string">&quot;localhost&quot;</span>, port=<span class="hljs-string">&quot;9200&quot;</span>)',wrap:!1}}),D=new g({props:{code:"cXVlcnklMjAlM0QlMjAlMjJtYWNoaW5lJTIyJTBBc2NvcmVzJTJDJTIwcmV0cmlldmVkX2V4YW1wbGVzJTIwJTNEJTIwc3F1YWQuZ2V0X25lYXJlc3RfZXhhbXBsZXMoJTIyY29udGV4dCUyMiUyQyUyMHF1ZXJ5JTJDJTIwayUzRDEwKSUwQXJldHJpZXZlZF9leGFtcGxlcyU1QiUyMnRpdGxlJTIyJTVEJTVCMCU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>query = <span class="hljs-string">&quot;machine&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>scores, retrieved_examples = squad.get_nearest_examples(<span class="hljs-string">&quot;context&quot;</span>, query, k=<span class="hljs-number">10</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>retrieved_examples[<span class="hljs-string">&quot;title&quot;</span>][<span class="hljs-number">0</span>]
<span class="hljs-string">&#x27;Computational_complexity_theory&#x27;</span>`,wrap:!1}}),S=new g({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBc3F1YWQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ3JhanB1cmthciUyRnNxdWFkJyUyQyUyMHNwbGl0JTNEJ3ZhbGlkYXRpb24nKSUwQXNxdWFkLmFkZF9lbGFzdGljc2VhcmNoX2luZGV4KCUyMmNvbnRleHQlMjIlMkMlMjBob3N0JTNEJTIybG9jYWxob3N0JTIyJTJDJTIwcG9ydCUzRCUyMjkyMDAlMjIlMkMlMjBlc19pbmRleF9uYW1lJTNEJTIyaGZfc3F1YWRfdmFsX2NvbnRleHQlMjIpJTBBc3F1YWQuZ2V0X2luZGV4KCUyMmNvbnRleHQlMjIpLmVzX2luZGV4X25hbWU=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>squad = load_dataset(<span class="hljs-string">&#x27;rajpurkar/squad&#x27;</span>, split=<span class="hljs-string">&#x27;validation&#x27;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>squad.add_elasticsearch_index(<span class="hljs-string">&quot;context&quot;</span>, host=<span class="hljs-string">&quot;localhost&quot;</span>, port=<span class="hljs-string">&quot;9200&quot;</span>, es_index_name=<span class="hljs-string">&quot;hf_squad_val_context&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>squad.get_index(<span class="hljs-string">&quot;context&quot;</span>).es_index_name
hf_squad_val_context`,wrap:!1}}),A=new g({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBc3F1YWQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ3JhanB1cmthciUyRnNxdWFkJyUyQyUyMHNwbGl0JTNEJ3ZhbGlkYXRpb24nKSUwQXNxdWFkLmxvYWRfZWxhc3RpY3NlYXJjaF9pbmRleCglMjJjb250ZXh0JTIyJTJDJTIwaG9zdCUzRCUyMmxvY2FsaG9zdCUyMiUyQyUyMHBvcnQlM0QlMjI5MjAwJTIyJTJDJTIwZXNfaW5kZXhfbmFtZSUzRCUyMmhmX3NxdWFkX3ZhbF9jb250ZXh0JTIyKSUwQXF1ZXJ5JTIwJTNEJTIwJTIybWFjaGluZSUyMiUwQXNjb3JlcyUyQyUyMHJldHJpZXZlZF9leGFtcGxlcyUyMCUzRCUyMHNxdWFkLmdldF9uZWFyZXN0X2V4YW1wbGVzKCUyMmNvbnRleHQlMjIlMkMlMjBxdWVyeSUyQyUyMGslM0QxMCk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>squad = load_dataset(<span class="hljs-string">&#x27;rajpurkar/squad&#x27;</span>, split=<span class="hljs-string">&#x27;validation&#x27;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>squad.load_elasticsearch_index(<span class="hljs-string">&quot;context&quot;</span>, host=<span class="hljs-string">&quot;localhost&quot;</span>, port=<span class="hljs-string">&quot;9200&quot;</span>, es_index_name=<span class="hljs-string">&quot;hf_squad_val_context&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>query = <span class="hljs-string">&quot;machine&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>scores, retrieved_examples = squad.get_nearest_examples(<span class="hljs-string">&quot;context&quot;</span>, query, k=<span class="hljs-number">10</span>)`,wrap:!1}}),K=new g({props:{code:"aW1wb3J0JTIwZWxhc3RpY3NlYXJjaCUyMGFzJTIwZXMlMEFpbXBvcnQlMjBlbGFzdGljc2VhcmNoLmhlbHBlcnMlMEFmcm9tJTIwZWxhc3RpY3NlYXJjaCUyMGltcG9ydCUyMEVsYXN0aWNzZWFyY2glMEFlc19jbGllbnQlMjAlM0QlMjBFbGFzdGljc2VhcmNoKCU1QiU3QiUyMmhvc3QlMjIlM0ElMjAlMjJsb2NhbGhvc3QlMjIlMkMlMjAlMjJwb3J0JTIyJTNBJTIwJTIyOTIwMCUyMiU3RCU1RCklMjAlMjAlMjMlMjBkZWZhdWx0JTIwY2xpZW50JTBBZXNfY29uZmlnJTIwJTNEJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIyc2V0dGluZ3MlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJudW1iZXJfb2Zfc2hhcmRzJTIyJTNBJTIwMSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmFuYWx5c2lzJTIyJTNBJTIwJTdCJTIyYW5hbHl6ZXIlMjIlM0ElMjAlN0IlMjJzdG9wX3N0YW5kYXJkJTIyJTNBJTIwJTdCJTIydHlwZSUyMiUzQSUyMCUyMnN0YW5kYXJkJTIyJTJDJTIwJTIyJTIwc3RvcHdvcmRzJTIyJTNBJTIwJTIyX2VuZ2xpc2hfJTIyJTdEJTdEJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIybWFwcGluZ3MlMjIlM0ElMjAlN0IlMjJwcm9wZXJ0aWVzJTIyJTNBJTIwJTdCJTIydGV4dCUyMiUzQSUyMCU3QiUyMnR5cGUlMjIlM0ElMjAlMjJ0ZXh0JTIyJTJDJTIwJTIyYW5hbHl6ZXIlMjIlM0ElMjAlMjJzdGFuZGFyZCUyMiUyQyUyMCUyMnNpbWlsYXJpdHklMjIlM0ElMjAlMjJCTTI1JTIyJTdEJTdEJTdEJTJDJTBBJTdEJTIwJTIwJTIzJTIwZGVmYXVsdCUyMGNvbmZpZyUwQWVzX2luZGV4X25hbWUlMjAlM0QlMjAlMjJoZl9zcXVhZF9jb250ZXh0JTIyJTIwJTIwJTIzJTIwbmFtZSUyMG9mJTIwdGhlJTIwaW5kZXglMjBpbiUyMEVsYXN0aWNzZWFyY2glMEFzcXVhZC5hZGRfZWxhc3RpY3NlYXJjaF9pbmRleCglMjJjb250ZXh0JTIyJTJDJTIwZXNfY2xpZW50JTNEZXNfY2xpZW50JTJDJTIwZXNfY29uZmlnJTNEZXNfY29uZmlnJTJDJTIwZXNfaW5kZXhfbmFtZSUzRGVzX2luZGV4X25hbWUp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> elasticsearch <span class="hljs-keyword">as</span> es
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> elasticsearch.helpers
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> elasticsearch <span class="hljs-keyword">import</span> Elasticsearch
<span class="hljs-meta">&gt;&gt;&gt; </span>es_client = Elasticsearch([{<span class="hljs-string">&quot;host&quot;</span>: <span class="hljs-string">&quot;localhost&quot;</span>, <span class="hljs-string">&quot;port&quot;</span>: <span class="hljs-string">&quot;9200&quot;</span>}]) <span class="hljs-comment"># default client</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>es_config = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;settings&quot;</span>: {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;number_of_shards&quot;</span>: <span class="hljs-number">1</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;analysis&quot;</span>: {<span class="hljs-string">&quot;analyzer&quot;</span>: {<span class="hljs-string">&quot;stop_standard&quot;</span>: {<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;standard&quot;</span>, <span class="hljs-string">&quot; stopwords&quot;</span>: <span class="hljs-string">&quot;_english_&quot;</span>}}},
<span class="hljs-meta">... </span> },
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;mappings&quot;</span>: {<span class="hljs-string">&quot;properties&quot;</span>: {<span class="hljs-string">&quot;text&quot;</span>: {<span class="hljs-string">&quot;type&quot;</span>: <span class="hljs-string">&quot;text&quot;</span>, <span class="hljs-string">&quot;analyzer&quot;</span>: <span class="hljs-string">&quot;standard&quot;</span>, <span class="hljs-string">&quot;similarity&quot;</span>: <span class="hljs-string">&quot;BM25&quot;</span>}}},
<span class="hljs-meta">... </span>} <span class="hljs-comment"># default config</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>es_index_name = <span class="hljs-string">&quot;hf_squad_context&quot;</span> <span class="hljs-comment"># name of the index in Elasticsearch</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>squad.add_elasticsearch_index(<span class="hljs-string">&quot;context&quot;</span>, es_client=es_client, es_config=es_config, es_index_name=es_index_name)`,wrap:!1}}),O=new Jt({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/faiss_es.mdx"}}),{c(){y=p("meta"),as=l(),ss=p("p"),es=l(),c(U.$$.fragment),ls=l(),c(X.$$.fragment),ns=l(),k=p("p"),k.innerHTML=Es,ps=l(),R=p("p"),R.textContent=Ys,is=l(),c(W.$$.fragment),cs=l(),$=p("p"),$.innerHTML=Bs,rs=l(),q=p("ol"),q.innerHTML=Hs,os=l(),c(F.$$.fragment),ms=l(),M=p("ol"),M.innerHTML=Ls,ds=l(),c(N.$$.fragment),hs=l(),j=p("ol"),j.innerHTML=Ds,us=l(),c(C.$$.fragment),gs=l(),f=p("ol"),f.innerHTML=Ss,Js=l(),c(G.$$.fragment),ys=l(),T=p("ol"),T.innerHTML=As,Ms=l(),c(v.$$.fragment),js=l(),w=p("ol"),w.innerHTML=Ps,fs=l(),c(V.$$.fragment),Ts=l(),_=p("ol"),_.innerHTML=Ks,ws=l(),c(z.$$.fragment),_s=l(),c(Q.$$.fragment),bs=l(),E=p("p"),E.textContent=Os,xs=l(),Y=p("p"),Y.innerHTML=st,Zs=l(),B=p("ol"),B.innerHTML=tt,Is=l(),c(H.$$.fragment),Us=l(),b=p("ol"),b.innerHTML=at,Xs=l(),c(L.$$.fragment),ks=l(),x=p("ol"),x.innerHTML=et,Rs=l(),c(D.$$.fragment),Ws=l(),Z=p("ol"),Z.innerHTML=lt,$s=l(),c(S.$$.fragment),qs=l(),I=p("ol"),I.innerHTML=nt,Fs=l(),c(A.$$.fragment),Ns=l(),P=p("p"),P.textContent=pt,Cs=l(),c(K.$$.fragment),Gs=l(),c(O.$$.fragment),vs=l(),ts=p("p"),this.h()},l(s){const t=ht("svelte-u9bgzb",document.head);y=i(t,"META",{name:!0,content:!0}),t.forEach(a),as=n(s),ss=i(s,"P",{}),it(ss).forEach(a),es=n(s),r(U.$$.fragment,s),ls=n(s),r(X.$$.fragment,s),ns=n(s),k=i(s,"P",{"data-svelte-h":!0}),o(k)!=="svelte-1n7ea75"&&(k.innerHTML=Es),ps=n(s),R=i(s,"P",{"data-svelte-h":!0}),o(R)!=="svelte-hdwpul"&&(R.textContent=Ys),is=n(s),r(W.$$.fragment,s),cs=n(s),$=i(s,"P",{"data-svelte-h":!0}),o($)!=="svelte-se9cy2"&&($.innerHTML=Bs),rs=n(s),q=i(s,"OL",{"data-svelte-h":!0}),o(q)!=="svelte-x1ghuo"&&(q.innerHTML=Hs),os=n(s),r(F.$$.fragment,s),ms=n(s),M=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(M)!=="svelte-1teerx9"&&(M.innerHTML=Ls),ds=n(s),r(N.$$.fragment,s),hs=n(s),j=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(j)!=="svelte-ym625v"&&(j.innerHTML=Ds),us=n(s),r(C.$$.fragment,s),gs=n(s),f=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(f)!=="svelte-1ast6iv"&&(f.innerHTML=Ss),Js=n(s),r(G.$$.fragment,s),ys=n(s),T=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(T)!=="svelte-64d8et"&&(T.innerHTML=As),Ms=n(s),r(v.$$.fragment,s),js=n(s),w=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(w)!=="svelte-10f8mcs"&&(w.innerHTML=Ps),fs=n(s),r(V.$$.fragment,s),Ts=n(s),_=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(_)!=="svelte-pmavtd"&&(_.innerHTML=Ks),ws=n(s),r(z.$$.fragment,s),_s=n(s),r(Q.$$.fragment,s),bs=n(s),E=i(s,"P",{"data-svelte-h":!0}),o(E)!=="svelte-8ul8ma"&&(E.textContent=Os),xs=n(s),Y=i(s,"P",{"data-svelte-h":!0}),o(Y)!=="svelte-195ztu1"&&(Y.innerHTML=st),Zs=n(s),B=i(s,"OL",{"data-svelte-h":!0}),o(B)!=="svelte-3fd7ny"&&(B.innerHTML=tt),Is=n(s),r(H.$$.fragment,s),Us=n(s),b=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(b)!=="svelte-1v2mfzu"&&(b.innerHTML=at),Xs=n(s),r(L.$$.fragment,s),ks=n(s),x=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(x)!=="svelte-ez44t5"&&(x.innerHTML=et),Rs=n(s),r(D.$$.fragment,s),Ws=n(s),Z=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(Z)!=="svelte-1tchn11"&&(Z.innerHTML=lt),$s=n(s),r(S.$$.fragment,s),qs=n(s),I=i(s,"OL",{start:!0,"data-svelte-h":!0}),o(I)!=="svelte-mhxyep"&&(I.innerHTML=nt),Fs=n(s),r(A.$$.fragment,s),Ns=n(s),P=i(s,"P",{"data-svelte-h":!0}),o(P)!=="svelte-1ajeg13"&&(P.textContent=pt),Cs=n(s),r(K.$$.fragment,s),Gs=n(s),r(O.$$.fragment,s),vs=n(s),ts=i(s,"P",{}),it(ts).forEach(a),this.h()},h(){J(y,"name","hf:doc:metadata"),J(y,"content",Mt),J(M,"start","2"),J(j,"start","3"),J(f,"start","4"),J(T,"start","5"),J(w,"start","6"),J(_,"start","7"),J(b,"start","2"),J(x,"start","3"),J(Z,"start","4"),J(I,"start","5")},m(s,t){ut(document.head,y),e(s,as,t),e(s,ss,t),e(s,es,t),m(U,s,t),e(s,ls,t),m(X,s,t),e(s,ns,t),e(s,k,t),e(s,ps,t),e(s,R,t),e(s,is,t),m(W,s,t),e(s,cs,t),e(s,$,t),e(s,rs,t),e(s,q,t),e(s,os,t),m(F,s,t),e(s,ms,t),e(s,M,t),e(s,ds,t),m(N,s,t),e(s,hs,t),e(s,j,t),e(s,us,t),m(C,s,t),e(s,gs,t),e(s,f,t),e(s,Js,t),m(G,s,t),e(s,ys,t),e(s,T,t),e(s,Ms,t),m(v,s,t),e(s,js,t),e(s,w,t),e(s,fs,t),m(V,s,t),e(s,Ts,t),e(s,_,t),e(s,ws,t),m(z,s,t),e(s,_s,t),m(Q,s,t),e(s,bs,t),e(s,E,t),e(s,xs,t),e(s,Y,t),e(s,Zs,t),e(s,B,t),e(s,Is,t),m(H,s,t),e(s,Us,t),e(s,b,t),e(s,Xs,t),m(L,s,t),e(s,ks,t),e(s,x,t),e(s,Rs,t),m(D,s,t),e(s,Ws,t),e(s,Z,t),e(s,$s,t),m(S,s,t),e(s,qs,t),e(s,I,t),e(s,Fs,t),m(A,s,t),e(s,Ns,t),e(s,P,t),e(s,Cs,t),m(K,s,t),e(s,Gs,t),m(O,s,t),e(s,vs,t),e(s,ts,t),Vs=!0},p:rt,i(s){Vs||(d(U.$$.fragment,s),d(X.$$.fragment,s),d(W.$$.fragment,s),d(F.$$.fragment,s),d(N.$$.fragment,s),d(C.$$.fragment,s),d(G.$$.fragment,s),d(v.$$.fragment,s),d(V.$$.fragment,s),d(z.$$.fragment,s),d(Q.$$.fragment,s),d(H.$$.fragment,s),d(L.$$.fragment,s),d(D.$$.fragment,s),d(S.$$.fragment,s),d(A.$$.fragment,s),d(K.$$.fragment,s),d(O.$$.fragment,s),Vs=!0)},o(s){h(U.$$.fragment,s),h(X.$$.fragment,s),h(W.$$.fragment,s),h(F.$$.fragment,s),h(N.$$.fragment,s),h(C.$$.fragment,s),h(G.$$.fragment,s),h(v.$$.fragment,s),h(V.$$.fragment,s),h(z.$$.fragment,s),h(Q.$$.fragment,s),h(H.$$.fragment,s),h(L.$$.fragment,s),h(D.$$.fragment,s),h(S.$$.fragment,s),h(A.$$.fragment,s),h(K.$$.fragment,s),h(O.$$.fragment,s),Vs=!1},d(s){s&&(a(as),a(ss),a(es),a(ls),a(ns),a(k),a(ps),a(R),a(is),a(cs),a($),a(rs),a(q),a(os),a(ms),a(M),a(ds),a(hs),a(j),a(us),a(gs),a(f),a(Js),a(ys),a(T),a(Ms),a(js),a(w),a(fs),a(Ts),a(_),a(ws),a(_s),a(bs),a(E),a(xs),a(Y),a(Zs),a(B),a(Is),a(Us),a(b),a(Xs),a(ks),a(x),a(Rs),a(Ws),a(Z),a($s),a(qs),a(I),a(Fs),a(Ns),a(P),a(Cs),a(Gs),a(vs),a(ts)),a(y),u(U,s),u(X,s),u(W,s),u(F,s),u(N,s),u(C,s),u(G,s),u(v,s),u(V,s),u(z,s),u(Q,s),u(H,s),u(L,s),u(D,s),u(S,s),u(A,s),u(K,s),u(O,s)}}}const Mt='{"title":"Search index","local":"search-index","sections":[{"title":"FAISS","local":"faiss","sections":[],"depth":2},{"title":"Elasticsearch","local":"elasticsearch","sections":[],"depth":2}],"depth":1}';function jt(Qs){return ot(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class bt extends mt{constructor(y){super(),dt(this,y,jt,yt,ct,{})}}export{bt as component};

Xet Storage Details

Size:
25.7 kB
·
Xet hash:
64a0904dc2fa3c05ce3770ae57dba36bc86ffcac4499d77d3aefb8e9756436aa

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.