Buckets:
| import{s as es,o as ls,n as Kt}from"../chunks/scheduler.37c15a92.js";import{S as ts,i as ss,g as h,s as o,r as u,A as as,h as f,f as t,c as r,j as Lt,u as U,x as j,k as Pe,l as ct,y as ns,a as s,v as y,t as c,b as at,d,w as b,p as nt}from"../chunks/index.2bf4358c.js";import{T as Pt}from"../chunks/Tip.363c041f.js";import{Y as is}from"../chunks/Youtube.1e50a667.js";import{C as T}from"../chunks/CodeBlock.4e987730.js";import{C as Ot}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{F as os}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Ke,E as rs}from"../chunks/getInferenceSnippets.1837c472.js";function Ms(C){let a,p;return a=new Ot({props:{chapter:5,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/it/chapter5/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/it/chapter5/section6_tf.ipynb"}]}}),{c(){u(a.$$.fragment)},l(n){U(a.$$.fragment,n)},m(n,w){y(a,n,w),p=!0},i(n){p||(d(a.$$.fragment,n),p=!0)},o(n){c(a.$$.fragment,n),p=!1},d(n){b(a,n)}}}function cs(C){let a,p;return a=new Ot({props:{chapter:5,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/it/chapter5/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/it/chapter5/section6_pt.ipynb"}]}}),{c(){u(a.$$.fragment)},l(n){U(a.$$.fragment,n)},m(n,w){y(a,n,w),p=!0},i(n){p||(d(a.$$.fragment,n),p=!0)},o(n){c(a.$$.fragment,n),p=!1},d(n){b(a,n)}}}function ds(C){let a,p='✏️ <strong>Prova tu!</strong> Prova ad utilizzare <code>Dataset.map()</code> per far esplodere la colonna <code>commenti</code> di <code>issues_dataset</code> <em>senza</em> utilizzare Pandas. È un po’ difficile: potrebbe tornarti utile la sezione <a href="https://huggingface.co/docs/datasets/about_map_batch#batch-mapping" rel="nofollow">“Batch mapping”</a> della documentazione di 🤗 Datasets.';return{c(){a=h("p"),a.innerHTML=p},l(n){a=f(n,"P",{"data-svelte-h":!0}),j(a)!=="svelte-123safh"&&(a.innerHTML=p)},m(n,w){s(n,a,w)},p:Kt,d(n){n&&t(a)}}}function ps(C){let a,p,n,w="Nota che abbiamo impostato <code>from_pt=True</code> come argomento del metodo <code>from_pretrained()</code>. Questo perchè il checkpoint <code>multi-qa-mpnet-base-dot-v1</code> ha solo pesi PyTorch, quindi impostare <code>from_pt=True</code> li convertirà automaticamente in formato TensorFlow. Come puoi vedere, è molto facile passare dall’uno all’altro su 🤗 Transformers!",M;return a=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBURkF1dG9Nb2RlbCUwQSUwQW1vZGVsX2NrcHQlMjAlM0QlMjAlMjJzZW50ZW5jZS10cmFuc2Zvcm1lcnMlMkZtdWx0aS1xYS1tcG5ldC1iYXNlLWRvdC12MSUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2NrcHQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbC5mcm9tX3ByZXRyYWluZWQobW9kZWxfY2twdCUyQyUyMGZyb21fcHQlM0RUcnVlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModel | |
| model_ckpt = <span class="hljs-string">"sentence-transformers/multi-qa-mpnet-base-dot-v1"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_ckpt) | |
| model = TFAutoModel.from_pretrained(model_ckpt, from_pt=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){u(a.$$.fragment),p=o(),n=h("p"),n.innerHTML=w},l(m){U(a.$$.fragment,m),p=r(m),n=f(m,"P",{"data-svelte-h":!0}),j(n)!=="svelte-yr7k0b"&&(n.innerHTML=w)},m(m,G){y(a,m,G),s(m,p,G),s(m,n,G),M=!0},i(m){M||(d(a.$$.fragment,m),M=!0)},o(m){c(a.$$.fragment,m),M=!1},d(m){m&&(t(p),t(n)),b(a,m)}}}function ms(C){let a,p,n,w="Per accelerare il processo di embedding, è bene usare la GPU per il modello e gli input, quindi:",M,m,G;return a=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBBdXRvTW9kZWwlMEElMEFtb2RlbF9ja3B0JTIwJTNEJTIwJTIyc2VudGVuY2UtdHJhbnNmb3JtZXJzJTJGbXVsdGktcWEtbXBuZXQtYmFzZS1kb3QtdjElMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChtb2RlbF9ja3B0KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsLmZyb21fcHJldHJhaW5lZChtb2RlbF9ja3B0KQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModel | |
| model_ckpt = <span class="hljs-string">"sentence-transformers/multi-qa-mpnet-base-dot-v1"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_ckpt) | |
| model = AutoModel.from_pretrained(model_ckpt)`,wrap:!1}}),m=new T({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFkZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMEFtb2RlbC50byhkZXZpY2Up",highlighted:`<span class="hljs-keyword">import</span> torch | |
| device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| model.to(device)`,wrap:!1}}),{c(){u(a.$$.fragment),p=o(),n=h("p"),n.textContent=w,M=o(),u(m.$$.fragment)},l(J){U(a.$$.fragment,J),p=r(J),n=f(J,"P",{"data-svelte-h":!0}),j(n)!=="svelte-kh1hef"&&(n.textContent=w),M=r(J),U(m.$$.fragment,J)},m(J,Q){y(a,J,Q),s(J,p,Q),s(J,n,Q),s(J,M,Q),y(m,J,Q),G=!0},i(J){G||(d(a.$$.fragment,J),d(m.$$.fragment,J),G=!0)},o(J){c(a.$$.fragment,J),c(m.$$.fragment,J),G=!1},d(J){J&&(t(p),t(n),t(M)),b(a,J),b(m,J)}}}function us(C){let a,p,n,w="Possiamo testare la funzione dandole in input la prima voce testuale del nostro corpus e studiando le dimensioni dell’output:",M,m,G,J,Q,Z,R="Bene, abbiamo convertito la prima voce del nostro corpus in un vettore a 768 dimensioni! Possiamo usare <code>Dataset.map()</code> per applicare la nostra funzione <code>get_embedding()</code> a ogni riga del nostro corpus, quindi creiamo una nuova colonna <code>embedding</code> così:",k,I,$;return a=new T({props:{code:"ZGVmJTIwZ2V0X2VtYmVkZGluZ3ModGV4dF9saXN0KSUzQSUwQSUyMCUyMCUyMCUyMGVuY29kZWRfaW5wdXQlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdGV4dF9saXN0JTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwZW5jb2RlZF9pbnB1dCUyMCUzRCUyMCU3QmslM0ElMjB2JTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGVuY29kZWRfaW5wdXQuaXRlbXMoKSU3RCUwQSUyMCUyMCUyMCUyMG1vZGVsX291dHB1dCUyMCUzRCUyMG1vZGVsKCoqZW5jb2RlZF9pbnB1dCklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBjbHNfcG9vbGluZyhtb2RlbF9vdXRwdXQp",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_embeddings</span>(<span class="hljs-params">text_list</span>): | |
| encoded_input = tokenizer( | |
| text_list, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span> | |
| ) | |
| encoded_input = {k: v <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> encoded_input.items()} | |
| model_output = model(**encoded_input) | |
| <span class="hljs-keyword">return</span> cls_pooling(model_output)`,wrap:!1}}),m=new T({props:{code:"ZW1iZWRkaW5nJTIwJTNEJTIwZ2V0X2VtYmVkZGluZ3MoY29tbWVudHNfZGF0YXNldCU1QiUyMnRleHQlMjIlNUQlNUIwJTVEKSUwQWVtYmVkZGluZy5zaGFwZQ==",highlighted:`embedding = get_embeddings(comments_dataset[<span class="hljs-string">"text"</span>][<span class="hljs-number">0</span>]) | |
| embedding.shape`,wrap:!1}}),J=new T({props:{code:"VGVuc29yU2hhcGUoJTVCMSUyQyUyMDc2OCU1RCk=",highlighted:'TensorShape([<span class="hljs-number">1</span>, <span class="hljs-number">768</span>])',wrap:!1}}),I=new T({props:{code:"ZW1iZWRkaW5nc19kYXRhc2V0JTIwJTNEJTIwY29tbWVudHNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmVtYmVkZGluZ3MlMjIlM0ElMjBnZXRfZW1iZWRkaW5ncyh4JTVCJTIydGV4dCUyMiU1RCkubnVtcHkoKSU1QjAlNUQlN0QlMEEp",highlighted:`embeddings_dataset = comments_dataset.<span class="hljs-built_in">map</span>( | |
| <span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">"embeddings"</span>: get_embeddings(x[<span class="hljs-string">"text"</span>]).numpy()[<span class="hljs-number">0</span>]} | |
| )`,wrap:!1}}),{c(){u(a.$$.fragment),p=o(),n=h("p"),n.textContent=w,M=o(),u(m.$$.fragment),G=o(),u(J.$$.fragment),Q=o(),Z=h("p"),Z.innerHTML=R,k=o(),u(I.$$.fragment)},l(i){U(a.$$.fragment,i),p=r(i),n=f(i,"P",{"data-svelte-h":!0}),j(n)!=="svelte-1cfq1o3"&&(n.textContent=w),M=r(i),U(m.$$.fragment,i),G=r(i),U(J.$$.fragment,i),Q=r(i),Z=f(i,"P",{"data-svelte-h":!0}),j(Z)!=="svelte-ayfaqa"&&(Z.innerHTML=R),k=r(i),U(I.$$.fragment,i)},m(i,g){y(a,i,g),s(i,p,g),s(i,n,g),s(i,M,g),y(m,i,g),s(i,G,g),y(J,i,g),s(i,Q,g),s(i,Z,g),s(i,k,g),y(I,i,g),$=!0},i(i){$||(d(a.$$.fragment,i),d(m.$$.fragment,i),d(J.$$.fragment,i),d(I.$$.fragment,i),$=!0)},o(i){c(a.$$.fragment,i),c(m.$$.fragment,i),c(J.$$.fragment,i),c(I.$$.fragment,i),$=!1},d(i){i&&(t(p),t(n),t(M),t(G),t(Q),t(Z),t(k)),b(a,i),b(m,i),b(J,i),b(I,i)}}}function Us(C){let a,p,n,w="Possiamo testare la funzione sul primo testo nel nostro corpus, e ispezionandone le dimensioni dell’ouput:",M,m,G,J,Q,Z,R="Bene, abbiamo convertito la prima voce del nostro corpus in un vettore a 768 dimensioni! Possiamo usare <code>Dataset.map()</code> per applicare la nostra funzione <code>get_embedding()</code> a ogni riga del nostro corpus, quindi creiamo una nuova colonna <code>embedding</code> così:",k,I,$;return a=new T({props:{code:"ZGVmJTIwZ2V0X2VtYmVkZGluZ3ModGV4dF9saXN0KSUzQSUwQSUyMCUyMCUyMCUyMGVuY29kZWRfaW5wdXQlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdGV4dF9saXN0JTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwZW5jb2RlZF9pbnB1dCUyMCUzRCUyMCU3QmslM0ElMjB2LnRvKGRldmljZSklMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwZW5jb2RlZF9pbnB1dC5pdGVtcygpJTdEJTBBJTIwJTIwJTIwJTIwbW9kZWxfb3V0cHV0JTIwJTNEJTIwbW9kZWwoKiplbmNvZGVkX2lucHV0KSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGNsc19wb29saW5nKG1vZGVsX291dHB1dCk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_embeddings</span>(<span class="hljs-params">text_list</span>): | |
| encoded_input = tokenizer( | |
| text_list, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span> | |
| ) | |
| encoded_input = {k: v.to(device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> encoded_input.items()} | |
| model_output = model(**encoded_input) | |
| <span class="hljs-keyword">return</span> cls_pooling(model_output)`,wrap:!1}}),m=new T({props:{code:"ZW1iZWRkaW5nJTIwJTNEJTIwZ2V0X2VtYmVkZGluZ3MoY29tbWVudHNfZGF0YXNldCU1QiUyMnRleHQlMjIlNUQlNUIwJTVEKSUwQWVtYmVkZGluZy5zaGFwZQ==",highlighted:`embedding = get_embeddings(comments_dataset[<span class="hljs-string">"text"</span>][<span class="hljs-number">0</span>]) | |
| embedding.shape`,wrap:!1}}),J=new T({props:{code:"dG9yY2guU2l6ZSglNUIxJTJDJTIwNzY4JTVEKQ==",highlighted:'torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">768</span>])',wrap:!1}}),I=new T({props:{code:"ZW1iZWRkaW5nc19kYXRhc2V0JTIwJTNEJTIwY29tbWVudHNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmVtYmVkZGluZ3MlMjIlM0ElMjBnZXRfZW1iZWRkaW5ncyh4JTVCJTIydGV4dCUyMiU1RCkuZGV0YWNoKCkuY3B1KCkubnVtcHkoKSU1QjAlNUQlN0QlMEEp",highlighted:`embeddings_dataset = comments_dataset.<span class="hljs-built_in">map</span>( | |
| <span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">"embeddings"</span>: get_embeddings(x[<span class="hljs-string">"text"</span>]).detach().cpu().numpy()[<span class="hljs-number">0</span>]} | |
| )`,wrap:!1}}),{c(){u(a.$$.fragment),p=o(),n=h("p"),n.textContent=w,M=o(),u(m.$$.fragment),G=o(),u(J.$$.fragment),Q=o(),Z=h("p"),Z.innerHTML=R,k=o(),u(I.$$.fragment)},l(i){U(a.$$.fragment,i),p=r(i),n=f(i,"P",{"data-svelte-h":!0}),j(n)!=="svelte-x4eorg"&&(n.textContent=w),M=r(i),U(m.$$.fragment,i),G=r(i),U(J.$$.fragment,i),Q=r(i),Z=f(i,"P",{"data-svelte-h":!0}),j(Z)!=="svelte-ayfaqa"&&(Z.innerHTML=R),k=r(i),U(I.$$.fragment,i)},m(i,g){y(a,i,g),s(i,p,g),s(i,n,g),s(i,M,g),y(m,i,g),s(i,G,g),y(J,i,g),s(i,Q,g),s(i,Z,g),s(i,k,g),y(I,i,g),$=!0},i(i){$||(d(a.$$.fragment,i),d(m.$$.fragment,i),d(J.$$.fragment,i),d(I.$$.fragment,i),$=!0)},o(i){c(a.$$.fragment,i),c(m.$$.fragment,i),c(J.$$.fragment,i),c(I.$$.fragment,i),$=!1},d(i){i&&(t(p),t(n),t(M),t(G),t(Q),t(Z),t(k)),b(a,i),b(m,i),b(J,i),b(I,i)}}}function ys(C){let a,p,n,w;return a=new T({props:{code:"cXVlc3Rpb24lMjAlM0QlMjAlMjJIb3clMjBjYW4lMjBJJTIwbG9hZCUyMGElMjBkYXRhc2V0JTIwb2ZmbGluZSUzRiUyMiUwQXF1ZXN0aW9uX2VtYmVkZGluZyUyMCUzRCUyMGdldF9lbWJlZGRpbmdzKCU1QnF1ZXN0aW9uJTVEKS5udW1weSgpJTBBcXVlc3Rpb25fZW1iZWRkaW5nLnNoYXBl",highlighted:`question = <span class="hljs-string">"How can I load a dataset offline?"</span> | |
| question_embedding = get_embeddings([question]).numpy() | |
| question_embedding.shape`,wrap:!1}}),n=new T({props:{code:"KDElMkMlMjA3Njgp",highlighted:'(<span class="hljs-number">1</span>, <span class="hljs-number">768</span>)',wrap:!1}}),{c(){u(a.$$.fragment),p=o(),u(n.$$.fragment)},l(M){U(a.$$.fragment,M),p=r(M),U(n.$$.fragment,M)},m(M,m){y(a,M,m),s(M,p,m),y(n,M,m),w=!0},i(M){w||(d(a.$$.fragment,M),d(n.$$.fragment,M),w=!0)},o(M){c(a.$$.fragment,M),c(n.$$.fragment,M),w=!1},d(M){M&&t(p),b(a,M),b(n,M)}}}function bs(C){let a,p,n,w;return a=new T({props:{code:"cXVlc3Rpb24lMjAlM0QlMjAlMjJIb3clMjBjYW4lMjBJJTIwbG9hZCUyMGElMjBkYXRhc2V0JTIwb2ZmbGluZSUzRiUyMiUwQXF1ZXN0aW9uX2VtYmVkZGluZyUyMCUzRCUyMGdldF9lbWJlZGRpbmdzKCU1QnF1ZXN0aW9uJTVEKS5jcHUoKS5kZXRhY2goKS5udW1weSgpJTBBcXVlc3Rpb25fZW1iZWRkaW5nLnNoYXBl",highlighted:`question = <span class="hljs-string">"How can I load a dataset offline?"</span> | |
| question_embedding = get_embeddings([question]).cpu().detach().numpy() | |
| question_embedding.shape`,wrap:!1}}),n=new T({props:{code:"dG9yY2guU2l6ZSglNUIxJTJDJTIwNzY4JTVEKQ==",highlighted:'torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">768</span>])',wrap:!1}}),{c(){u(a.$$.fragment),p=o(),u(n.$$.fragment)},l(M){U(a.$$.fragment,M),p=r(M),U(n.$$.fragment,M)},m(M,m){y(a,M,m),s(M,p,m),y(n,M,m),w=!0},i(M){w||(d(a.$$.fragment,M),d(n.$$.fragment,M),w=!0)},o(M){c(a.$$.fragment,M),c(n.$$.fragment,M),w=!1},d(M){M&&t(p),b(a,M),b(n,M)}}}function Js(C){let a,p="✏️ <strong>Prova tu!</strong> Crea la tua query e prova a trovare una risposta tra i documenti raccolti. Potresti aver bisogno di aumentare il parametro <code>k</code> in <code>Dataset.get_nearest_examples()</code> per allargare la ricerca.";return{c(){a=h("p"),a.innerHTML=p},l(n){a=f(n,"P",{"data-svelte-h":!0}),j(a)!=="svelte-xjwbls"&&(a.innerHTML=p)},m(n,w){s(n,a,w)},p:Kt,d(n){n&&t(a)}}}function hs(C){let a,p,n,w,M,m,G,J,Q,Z,R,k,I='Nella <a href="/course/chapter5/5">sezione 5</a> abbiamo creato un dataset di issue e commenti dalla repository GitHub di 🤗 Datasets. In questa sezione useremo queste informazioni per costrure un motore di ricerca semantico che ci può aiutare a trovare risposte alle nostre domande urgenti sulla libreria!',$,i,g,S,Oe,F,dt='Come abbiamo visto nel <a href="/course/chapter1">Capitolo 1</a>, i language model basati su Transformer rappresentano ogni token in un testo come un <em>vettore</em>, detto <em>embedding</em>. È possibile “mettere insieme” i diversi embedding per creare una rappresentazione vettoriale di un’intera frase, paragrafo o (in alcuni casi) documento. Questi embedding possono essere usati per trovare documenti simili in un corpus calcolandone la similarità, ad esempio usando il prodotto scalere (o altre misure di similarità) tra ogni embedding, e restituendo i documenti più simili.',el,E,pt="In questa sezione useremo gli embedding per sviluppare un motore di ricerca semantico. Questi motori di ricerca offrono diversi vantagig rispetto ai metodo convenzionali, basati sulla ricerca, all’interno dei documenti, delle parole chiavi presente in una query.",ll,V,mt='<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/semantic-search.svg" alt="Semantic search."/> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/semantic-search-dark.svg" alt="Semantic search."/>',tl,H,sl,A,ut="La prima cosa che dobbiamo fare è scaricare il nostro dataset di issue, quindi utilizziamo la libreria 🤗 Hub per scaricare i file usando l’URL dell’Hub Hugging Face:",al,q,nl,D,Ut='Se conseriamo l’URL iin <code>data_files</code>, possiamo caricare il dataset utilizzando il metodo introdotto nella <a href="/course/chapter5/2">sezione 2</a>:',il,L,ol,P,rl,K,yt="Qui abbiamo specificato la sezione di defaul <code>train</code> in <code>load_dataset()</code>, così che questa funzione resituisce un <code>Dataset</code> invece di un <code>DatasetDict</code>. La prima cosa da fare è filtrare le richieste di pull, poichè queste tendono a essere usate raramente come risposta alle domande degli utenti, e introdurrebbero rumore nel nostro motore di ricerca. Come dovrebbe esser enoto, possiamo usare la funzione <code>Dataset.filter()</code> per escludere questi dati dal nostro dataset. Già che ci siamo, eliminiamo anche le righe senza commenti, poiché queste non presentano nessuna risposta alle domande degli utenti:",Ml,O,cl,ee,dl,le,bt="Possiamo vedere che ci sono molte colonne nel nostro dataset, molte delle quali non servono alla costruzione del nostro motore di ricerca. Da una prospettiva di ricerca, le colonne maggiormente informative sono <code>title</code>, <code>body</code>, e <code>comments</code>, mentre <code>html_url</code> ci fornisce un link all’issue originale. Usiamo la funzione <code>Dataset.remove_columns()</code> per eliminare le colonne rimanenti:",pl,te,ml,se,ul,ae,Jt='Per crare i nostri embedding arricchiremo ognu commento con il titolo e il corpo dell’issue, visto che questi campi spesso includono informazioni utili sul contesto. Poiché la nostra colonna <code>comment</code> è al momento una lista di commenti per ogni issue, dobbiamo “farla esplodere” così che ogni riga consista in una tupla <code>(html_url, title, body, comment)</code>. In panda è possibile farlo utilizzando la <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.explode.html" rel="nofollow">funzione <code>Dataframe.explode()</code></a>, che crea una nuova riga per ogni elemento in una colonna in formato di lista, ripetendo i valori di tutte le altre colonne. Per vederlo in azione, prima di tutto passiamo al formato <code>DataFrame</code>:',Ul,ne,yl,ie,ht="Se diamo un’occhiata alla prima riga di questo <code>DataFrame</code>, possiamo vedere che ci sono quattro commenti associati con quest’issue:",bl,oe,Jl,re,hl,Me,ft="Quando “esplodiamo” <code>df</code>, ci aspettiamo di avere una riga per ognuno di questi commenti. Controlliamo se è così:",fl,ce,Tl,_,Tt='<thead><tr style="text-align: right;"><th></th> <th>html_url</th> <th>title</th> <th>comments</th> <th>body</th></tr></thead> <tbody><tr><th>0</th> <td>https://github.com/huggingface/datasets/issues/2787</td> <td>ConnectionError: Couldn't reach https://raw.githubusercontent.com</td> <td>the bug code locate in :\\r\\n if data_args.task_name is not None...</td> <td>Hello,\\r\\nI am trying to run run_glue.py and it gives me this error...</td></tr> <tr><th>1</th> <td>https://github.com/huggingface/datasets/issues/2787</td> <td>ConnectionError: Couldn't reach https://raw.githubusercontent.com</td> <td>Hi @jinec,\\r\\n\\r\\nFrom time to time we get this kind of `ConnectionError` coming from the github.com website: https://raw.githubusercontent.com...</td> <td>Hello,\\r\\nI am trying to run run_glue.py and it gives me this error...</td></tr> <tr><th>2</th> <td>https://github.com/huggingface/datasets/issues/2787</td> <td>ConnectionError: Couldn't reach https://raw.githubusercontent.com</td> <td>cannot connect,even by Web browser,please check that there is some problems。</td> <td>Hello,\\r\\nI am trying to run run_glue.py and it gives me this error...</td></tr> <tr><th>3</th> <td>https://github.com/huggingface/datasets/issues/2787</td> <td>ConnectionError: Couldn't reach https://raw.githubusercontent.com</td> <td>I can access https://raw.githubusercontent.com/huggingface/datasets/1.7.0/datasets/glue/glue.py without problem...</td> <td>Hello,\\r\\nI am trying to run run_glue.py and it gives me this error...</td></tr></tbody>',jl,de,jt="bene, possiamo vedere che le righe sono state duplicate, e che la colonna <code>comment</code> contiene i diversi comment! Ora che abbiamo finito con Pandas, possiamo passare velocemente a <code>Dataset</code> caricando il <code>DataFrame</code> in memoria:",wl,pe,gl,me,Ql,ue,wt="Perfetto, ora abbiamo qualche migliaio di commenti con cui lavorare!",Zl,W,Gl,Ue,gt="Ora che abbiamo un commento per riga, creiamo una nuova colonna <code>comments_length</code> che contiene il numero di parole per ogni commento:",Cl,ye,Il,be,Qt="Possiamo usare questa nuova colonna per eliminare i commenti brevi, che solitamente includono cose del tipo “cc @lewtun” o “Grazie!”, che non sono pertinenti per il nostro motore di ricerca. Non abbiamo un numero preciso da selezionare per questo filtro, ma 15 parole dovrebbero andare bene:",kl,Je,$l,he,Rl,fe,Zt="Una volta data una pulizia al nostro dataset, possiamo concatenare il titolo, la descrizione e i commenti delle issue in una nuova colonna <code>text</code>. Come al solito , scriveremo una semplice funzione che possiamo passare a <code>Dataset.map()</code>:",_l,Te,Nl,je,Gt="Siamo finalmente pronti a creare degli embedding! Diamo un’occhiata.",Bl,we,zl,ge,Ct='Abbiamo visto nel <a href="/course/chapter2">Capitolo 2</a> che possiamo ottenere i token embedding utilizando la classe <code>AutoModel</code>. Dobbiamo solo scegliere un checkpoint valido da cui caricare il modell. Per fortuna, esiste una libreria chiamata <code>sentence-transformers</code>, dedicata alla creazione di embedding. Seguendo la descrizione nella <a href="https://www.sbert.net/examples/applications/semantic-search/README.html#symmetric-vs-asymmetric-semantic-search" rel="nofollow">documentazione</a>della libreria, il nostro caso d’uso è un esempio di <em>asymmetric semantic search</em> perché abbiamo una breve query per cui vogliamo trovare risposte in un documento lungo, come ad esempio un commento a un issue. La <a href="https://www.sbert.net/docs/pretrained_models.html#model-overview" rel="nofollow">scheda di riepilogo dei modelli</a> nella documentazione ci indica che il checkpoint <code>multi-qa-mpnet-base-dot-v1</code> ha mostrato la performance migliore per la ricerca semantica, quindi è quello che useremo per la nostra applicazione. Caricheremo anche il tokenizzatore usando lo stesso checkpoint:',Xl,N,B,Ae,Qe,It="Come abbiamo già detto prima, vorremmo rappresentare ogni entrata nel nostro corpus di issue GitHub come un vettore singolo, per cui avremo bisogno di calcolare la media, o il “pool” dei nostri token embedding. Un metodo comune è di effettuare un <em>CLS pooling</em> sull’output del nostro modello: questa tecnica su basa sul recuperare semplicemente l’ultimo stato nascosto del token speciale <code>[CLS]</code>. La funzione seguente fa proprio questo:",xl,Ze,vl,Ge,kt="Poi, creeremo una funzione di supporto che: tokenizza una lista di documenti, inserire i tensori sulla GPU, li usa come input per il modello, e infine applica il CLS pooling agli output:",Vl,z,X,qe,Ce,$t="Node che abbiamo convertito gli embedding in array NumPy — questo perchè 🤗 Datasets ha bisogno di questo formato per indicizzare gli embedding con FAISS, che è ciò che faremo nella prossima sezione.",Wl,Ie,Yl,ke,Rt=`Ora che abbiamo un dataset di embedding, abbiamo bisogno di un modo per effettuare una ricerca. Per far ciò, useremo una struttura specialie di 🤗 Datasets | |
| chiamato <em>indice FAISS</em>. <a href="https://faiss.ai/" rel="nofollow">FAISS</a> (Facebook AI Similarity Search) è una libreria che permette di utilizzare algoritmi efficient per ricercare e raggruppare gli embedding.`,Sl,$e,_t="L’idea di base dietro FAISS è di creare un formato speciale di dati chiamato <em>indice</em> che permette di trovare quali embedding sono simili a un embedding in input. Creare un indice FAISS su 🤗 Datasets è semplice — usiamo la funzione <code>Dataset.add_faiss_index()</code> e specificare quale colonna nel nostro dataset vorremmo indicizzare:",Fl,Re,El,_e,Nt="Ora possiamo eseguire dele query su questo indice effettuando una ricerca degli elementi più vicini usando la funzione <code>Dataset.get_nearest_examples()</code>. Testiamolo creando un embedding per una domanda.",Hl,x,v,De,Ne,Bt="Proprio come con i documenti, ora abbiamo un vettore di 768 dimensioni che rappresenta la query, che possiamo confrontare con l’intero corpus per trovare gli embedding più simili:",Al,Be,ql,ze,zt="La funzione <code>Dataset.get_nearest_examples()</code> restituisce una tupla di valori che valutano la sovrapposizione tra la query e il documento, e un set corrispondente di campioni (in questo caso, le 5 corrispondenze migliori). Salviamole in un <code>pandas.DataFrame</code>, così che possiamo ordinarle facilmente:",Dl,Xe,Ll,xe,Xt="Ora possiamo iterare sulle prime righe per vedere quanto bene la nostra query corrisponde ai commenti disponibili:",Pl,ve,Kl,Ve,Ol,We,xt="Non male! Il nostro secondo risultato sembra soddisfare la nostra richiesta.",et,Y,lt,Ye,tt,Le,st;M=new os({props:{fw:C[0]}}),G=new Ke({props:{title:"Ricerca semantica con FAISS",local:"ricerca-semantica-con-faiss",headingTag:"h1"}});const vt=[cs,Ms],Se=[];function Vt(e,l){return e[0]==="pt"?0:1}Q=Vt(C),Z=Se[Q]=vt[Q](C),i=new is({props:{id:"OATCgQtNX2o"}}),S=new Ke({props:{title:"Usare gli embedding per la ricerca semantica",local:"usare-gli-embedding-per-la-ricerca-semantica",headingTag:"h2"}}),H=new Ke({props:{title:"Caricare e preparare il dataset",local:"caricare-e-preparare-il-dataset",headingTag:"h2"}}),q=new T({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl91cmwlMEElMEFkYXRhX2ZpbGVzJTIwJTNEJTIwaGZfaHViX3VybCglMEElMjAlMjAlMjAlMjByZXBvX2lkJTNEJTIybGV3dHVuJTJGZ2l0aHViLWlzc3VlcyUyMiUyQyUwQSUyMCUyMCUyMCUyMGZpbGVuYW1lJTNEJTIyZGF0YXNldHMtaXNzdWVzLXdpdGgtY29tbWVudHMuanNvbmwlMjIlMkMlMEElMjAlMjAlMjAlMjByZXBvX3R5cGUlM0QlMjJkYXRhc2V0JTIyJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_url | |
| data_files = hf_hub_url( | |
| repo_id=<span class="hljs-string">"lewtun/github-issues"</span>, | |
| filename=<span class="hljs-string">"datasets-issues-with-comments.jsonl"</span>, | |
| repo_type=<span class="hljs-string">"dataset"</span>, | |
| )`,wrap:!1}}),L=new T({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBaXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyanNvbiUyMiUyQyUyMGRhdGFfZmlsZXMlM0RkYXRhX2ZpbGVzJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFpc3N1ZXNfZGF0YXNldA==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| issues_dataset = load_dataset(<span class="hljs-string">"json"</span>, data_files=data_files, split=<span class="hljs-string">"train"</span>) | |
| issues_dataset`,wrap:!1}}),P=new T({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3BlcmZvcm1lZF92aWFfZ2l0aHViX2FwcCclMkMlMjAnaXNfcHVsbF9yZXF1ZXN0JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjg1NSUwQSU3RCk=",highlighted:`Dataset({ | |
| features: [<span class="hljs-string">'url'</span>, <span class="hljs-string">'repository_url'</span>, <span class="hljs-string">'labels_url'</span>, <span class="hljs-string">'comments_url'</span>, <span class="hljs-string">'events_url'</span>, <span class="hljs-string">'html_url'</span>, <span class="hljs-string">'id'</span>, <span class="hljs-string">'node_id'</span>, <span class="hljs-string">'number'</span>, <span class="hljs-string">'title'</span>, <span class="hljs-string">'user'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'state'</span>, <span class="hljs-string">'locked'</span>, <span class="hljs-string">'assignee'</span>, <span class="hljs-string">'assignees'</span>, <span class="hljs-string">'milestone'</span>, <span class="hljs-string">'comments'</span>, <span class="hljs-string">'created_at'</span>, <span class="hljs-string">'updated_at'</span>, <span class="hljs-string">'closed_at'</span>, <span class="hljs-string">'author_association'</span>, <span class="hljs-string">'active_lock_reason'</span>, <span class="hljs-string">'pull_request'</span>, <span class="hljs-string">'body'</span>, <span class="hljs-string">'performed_via_github_app'</span>, <span class="hljs-string">'is_pull_request'</span>], | |
| num_rows: <span class="hljs-number">2855</span> | |
| })`,wrap:!1}}),O=new T({props:{code:"aXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5maWx0ZXIoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCh4JTVCJTIyaXNfcHVsbF9yZXF1ZXN0JTIyJTVEJTIwJTNEJTNEJTIwRmFsc2UlMjBhbmQlMjBsZW4oeCU1QiUyMmNvbW1lbnRzJTIyJTVEKSUyMCUzRSUyMDApJTBBKSUwQWlzc3Vlc19kYXRhc2V0",highlighted:`issues_dataset = issues_dataset.<span class="hljs-built_in">filter</span>( | |
| <span class="hljs-keyword">lambda</span> x: (x[<span class="hljs-string">"is_pull_request"</span>] == <span class="hljs-literal">False</span> <span class="hljs-keyword">and</span> <span class="hljs-built_in">len</span>(x[<span class="hljs-string">"comments"</span>]) > <span class="hljs-number">0</span>) | |
| ) | |
| issues_dataset`,wrap:!1}}),ee=new T({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3BlcmZvcm1lZF92aWFfZ2l0aHViX2FwcCclMkMlMjAnaXNfcHVsbF9yZXF1ZXN0JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNzcxJTBBJTdEKQ==",highlighted:`Dataset({ | |
| features: [<span class="hljs-string">'url'</span>, <span class="hljs-string">'repository_url'</span>, <span class="hljs-string">'labels_url'</span>, <span class="hljs-string">'comments_url'</span>, <span class="hljs-string">'events_url'</span>, <span class="hljs-string">'html_url'</span>, <span class="hljs-string">'id'</span>, <span class="hljs-string">'node_id'</span>, <span class="hljs-string">'number'</span>, <span class="hljs-string">'title'</span>, <span class="hljs-string">'user'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'state'</span>, <span class="hljs-string">'locked'</span>, <span class="hljs-string">'assignee'</span>, <span class="hljs-string">'assignees'</span>, <span class="hljs-string">'milestone'</span>, <span class="hljs-string">'comments'</span>, <span class="hljs-string">'created_at'</span>, <span class="hljs-string">'updated_at'</span>, <span class="hljs-string">'closed_at'</span>, <span class="hljs-string">'author_association'</span>, <span class="hljs-string">'active_lock_reason'</span>, <span class="hljs-string">'pull_request'</span>, <span class="hljs-string">'body'</span>, <span class="hljs-string">'performed_via_github_app'</span>, <span class="hljs-string">'is_pull_request'</span>], | |
| num_rows: <span class="hljs-number">771</span> | |
| })`,wrap:!1}}),te=new T({props:{code:"Y29sdW1ucyUyMCUzRCUyMGlzc3Vlc19kYXRhc2V0LmNvbHVtbl9uYW1lcyUwQWNvbHVtbnNfdG9fa2VlcCUyMCUzRCUyMCU1QiUyMnRpdGxlJTIyJTJDJTIwJTIyYm9keSUyMiUyQyUyMCUyMmh0bWxfdXJsJTIyJTJDJTIwJTIyY29tbWVudHMlMjIlNUQlMEFjb2x1bW5zX3RvX3JlbW92ZSUyMCUzRCUyMHNldChjb2x1bW5zX3RvX2tlZXApLnN5bW1ldHJpY19kaWZmZXJlbmNlKGNvbHVtbnMpJTBBaXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5yZW1vdmVfY29sdW1ucyhjb2x1bW5zX3RvX3JlbW92ZSklMEFpc3N1ZXNfZGF0YXNldA==",highlighted:`columns = issues_dataset.column_names | |
| columns_to_keep = [<span class="hljs-string">"title"</span>, <span class="hljs-string">"body"</span>, <span class="hljs-string">"html_url"</span>, <span class="hljs-string">"comments"</span>] | |
| columns_to_remove = <span class="hljs-built_in">set</span>(columns_to_keep).symmetric_difference(columns) | |
| issues_dataset = issues_dataset.remove_columns(columns_to_remove) | |
| issues_dataset`,wrap:!1}}),se=new T({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidodG1sX3VybCclMkMlMjAndGl0bGUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdib2R5JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNzcxJTBBJTdEKQ==",highlighted:`Dataset({ | |
| features: [<span class="hljs-string">'html_url'</span>, <span class="hljs-string">'title'</span>, <span class="hljs-string">'comments'</span>, <span class="hljs-string">'body'</span>], | |
| num_rows: <span class="hljs-number">771</span> | |
| })`,wrap:!1}}),ne=new T({props:{code:"aXNzdWVzX2RhdGFzZXQuc2V0X2Zvcm1hdCglMjJwYW5kYXMlMjIpJTBBZGYlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldCU1QiUzQSU1RA==",highlighted:`issues_dataset.set_format(<span class="hljs-string">"pandas"</span>) | |
| df = issues_dataset[:]`,wrap:!1}}),oe=new T({props:{code:"ZGYlNUIlMjJjb21tZW50cyUyMiU1RCU1QjAlNUQudG9saXN0KCk=",highlighted:'df[<span class="hljs-string">"comments"</span>][<span class="hljs-number">0</span>].tolist()',wrap:!1}}),re=new T({props:{code:"JTVCJ3RoZSUyMGJ1ZyUyMGNvZGUlMjBsb2NhdGUlMjBpbiUyMCVFRiVCQyU5QSU1Q3IlNUNuJTIwJTIwJTIwJTIwaWYlMjBkYXRhX2FyZ3MudGFza19uYW1lJTIwaXMlMjBub3QlMjBOb25lJTNBJTVDciU1Q24lMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBEb3dubG9hZGluZyUyMGFuZCUyMGxvYWRpbmclMjBhJTIwZGF0YXNldCUyMGZyb20lMjB0aGUlMjBodWIuJTVDciU1Q24lMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBkYXRhc2V0cyUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJnbHVlJTIyJTJDJTIwZGF0YV9hcmdzLnRhc2tfbmFtZSUyQyUyMGNhY2hlX2RpciUzRG1vZGVsX2FyZ3MuY2FjaGVfZGlyKSclMkMlMEElMjAnSGklMjAlNDBqaW5lYyUyQyU1Q3IlNUNuJTVDciU1Q25Gcm9tJTIwdGltZSUyMHRvJTIwdGltZSUyMHdlJTIwZ2V0JTIwdGhpcyUyMGtpbmQlMjBvZiUyMCU2MENvbm5lY3Rpb25FcnJvciU2MCUyMGNvbWluZyUyMGZyb20lMjB0aGUlMjBnaXRodWIuY29tJTIwd2Vic2l0ZSUzQSUyMGh0dHBzJTNBJTJGJTJGcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSU1Q3IlNUNuJTVDciU1Q25Ob3JtYWxseSUyQyUyMGl0JTIwc2hvdWxkJTIwd29yayUyMGlmJTIweW91JTIwd2FpdCUyMGElMjBsaXR0bGUlMjBhbmQlMjB0aGVuJTIwcmV0cnkuJTVDciU1Q24lNUNyJTVDbkNvdWxkJTIweW91JTIwcGxlYXNlJTIwY29uZmlybSUyMGlmJTIwdGhlJTIwcHJvYmxlbSUyMHBlcnNpc3RzJTNGJyUyQyUwQSUyMCdjYW5ub3QlMjBjb25uZWN0JUVGJUJDJThDZXZlbiUyMGJ5JTIwV2ViJTIwYnJvd3NlciVFRiVCQyU4Q3BsZWFzZSUyMGNoZWNrJTIwdGhhdCUyMCUyMHRoZXJlJTIwaXMlMjBzb21lJTIwJTIwcHJvYmxlbXMlRTMlODAlODInJTJDJTBBJTIwJ0klMjBjYW4lMjBhY2Nlc3MlMjBodHRwcyUzQSUyRiUyRnJhdy5naXRodWJ1c2VyY29udGVudC5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGMS43LjAlMkZkYXRhc2V0cyUyRmdsdWUlMkZnbHVlLnB5JTIwd2l0aG91dCUyMHByb2JsZW0uLi4nJTVE",highlighted:`[<span class="hljs-string">'the bug code locate in :\\r\\n if data_args.task_name is not None:\\r\\n # Downloading and loading a dataset from the hub.\\r\\n datasets = load_dataset("glue", data_args.task_name, cache_dir=model_args.cache_dir)'</span>, | |
| <span class="hljs-string">'Hi @jinec,\\r\\n\\r\\nFrom time to time we get this kind of \`ConnectionError\` coming from the github.com website: https://raw.githubusercontent.com\\r\\n\\r\\nNormally, it should work if you wait a little and then retry.\\r\\n\\r\\nCould you please confirm if the problem persists?'</span>, | |
| <span class="hljs-string">'cannot connect,even by Web browser,please check that there is some problems。'</span>, | |
| <span class="hljs-string">'I can access https://raw.githubusercontent.com/huggingface/datasets/1.7.0/datasets/glue/glue.py without problem...'</span>]`,wrap:!1}}),ce=new T({props:{code:"Y29tbWVudHNfZGYlMjAlM0QlMjBkZi5leHBsb2RlKCUyMmNvbW1lbnRzJTIyJTJDJTIwaWdub3JlX2luZGV4JTNEVHJ1ZSklMEFjb21tZW50c19kZi5oZWFkKDQp",highlighted:`comments_df = df.explode(<span class="hljs-string">"comments"</span>, ignore_index=<span class="hljs-literal">True</span>) | |
| comments_df.head(<span class="hljs-number">4</span>)`,wrap:!1}}),pe=new T({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQSUwQWNvbW1lbnRzX2RhdGFzZXQlMjAlM0QlMjBEYXRhc2V0LmZyb21fcGFuZGFzKGNvbW1lbnRzX2RmKSUwQWNvbW1lbnRzX2RhdGFzZXQ=",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| comments_dataset = Dataset.from_pandas(comments_df) | |
| comments_dataset`,wrap:!1}}),me=new T({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidodG1sX3VybCclMkMlMjAndGl0bGUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdib2R5JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjg0MiUwQSU3RCk=",highlighted:`Dataset({ | |
| features: [<span class="hljs-string">'html_url'</span>, <span class="hljs-string">'title'</span>, <span class="hljs-string">'comments'</span>, <span class="hljs-string">'body'</span>], | |
| num_rows: <span class="hljs-number">2842</span> | |
| })`,wrap:!1}}),W=new Pt({props:{$$slots:{default:[ds]},$$scope:{ctx:C}}}),ye=new T({props:{code:"Y29tbWVudHNfZGF0YXNldCUyMCUzRCUyMGNvbW1lbnRzX2RhdGFzZXQubWFwKCUwQSUyMCUyMCUyMCUyMGxhbWJkYSUyMHglM0ElMjAlN0IlMjJjb21tZW50X2xlbmd0aCUyMiUzQSUyMGxlbih4JTVCJTIyY29tbWVudHMlMjIlNUQuc3BsaXQoKSklN0QlMEEp",highlighted:`comments_dataset = comments_dataset.<span class="hljs-built_in">map</span>( | |
| <span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">"comment_length"</span>: <span class="hljs-built_in">len</span>(x[<span class="hljs-string">"comments"</span>].split())} | |
| )`,wrap:!1}}),Je=new T({props:{code:"Y29tbWVudHNfZGF0YXNldCUyMCUzRCUyMGNvbW1lbnRzX2RhdGFzZXQuZmlsdGVyKGxhbWJkYSUyMHglM0ElMjB4JTVCJTIyY29tbWVudF9sZW5ndGglMjIlNUQlMjAlM0UlMjAxNSklMEFjb21tZW50c19kYXRhc2V0",highlighted:`comments_dataset = comments_dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: x[<span class="hljs-string">"comment_length"</span>] > <span class="hljs-number">15</span>) | |
| comments_dataset`,wrap:!1}}),he=new T({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidodG1sX3VybCclMkMlMjAndGl0bGUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdib2R5JyUyQyUyMCdjb21tZW50X2xlbmd0aCclNUQlMkMlMEElMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDIwOTglMEElN0Qp",highlighted:`Dataset({ | |
| features: [<span class="hljs-string">'html_url'</span>, <span class="hljs-string">'title'</span>, <span class="hljs-string">'comments'</span>, <span class="hljs-string">'body'</span>, <span class="hljs-string">'comment_length'</span>], | |
| num_rows: <span class="hljs-number">2098</span> | |
| })`,wrap:!1}}),Te=new T({props:{code:"ZGVmJTIwY29uY2F0ZW5hdGVfdGV4dChleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0ZXh0JTIyJTNBJTIwZXhhbXBsZXMlNUIlMjJ0aXRsZSUyMiU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyQiUyMCUyMiUyMCU1Q24lMjAlMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMkIlMjBleGFtcGxlcyU1QiUyMmJvZHklMjIlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMkIlMjAlMjIlMjAlNUNuJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTJCJTIwZXhhbXBsZXMlNUIlMjJjb21tZW50cyUyMiU1RCUwQSUyMCUyMCUyMCUyMCU3RCUwQSUwQSUwQWNvbW1lbnRzX2RhdGFzZXQlMjAlM0QlMjBjb21tZW50c19kYXRhc2V0Lm1hcChjb25jYXRlbmF0ZV90ZXh0KQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">concatenate_text</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-keyword">return</span> { | |
| <span class="hljs-string">"text"</span>: examples[<span class="hljs-string">"title"</span>] | |
| + <span class="hljs-string">" \\n "</span> | |
| + examples[<span class="hljs-string">"body"</span>] | |
| + <span class="hljs-string">" \\n "</span> | |
| + examples[<span class="hljs-string">"comments"</span>] | |
| } | |
| comments_dataset = comments_dataset.<span class="hljs-built_in">map</span>(concatenate_text)`,wrap:!1}}),we=new Ke({props:{title:"Creare i text embedding",local:"creare-i-text-embedding",headingTag:"h2"}});const Wt=[ms,ps],Fe=[];function Yt(e,l){return e[0]==="pt"?0:1}N=Yt(C),B=Fe[N]=Wt[N](C),Ze=new T({props:{code:"ZGVmJTIwY2xzX3Bvb2xpbmcobW9kZWxfb3V0cHV0KSUzQSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMG1vZGVsX291dHB1dC5sYXN0X2hpZGRlbl9zdGF0ZSU1QiUzQSUyQyUyMDAlNUQ=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">cls_pooling</span>(<span class="hljs-params">model_output</span>): | |
| <span class="hljs-keyword">return</span> model_output.last_hidden_state[:, <span class="hljs-number">0</span>]`,wrap:!1}});const St=[Us,us],Ee=[];function Ft(e,l){return e[0]==="pt"?0:1}z=Ft(C),X=Ee[z]=St[z](C),Ie=new Ke({props:{title:"Usare FAISS per ricerca di similarità efficiente",local:"usare-faiss-per-ricerca-di-similarità-efficiente",headingTag:"h2"}}),Re=new T({props:{code:"ZW1iZWRkaW5nc19kYXRhc2V0LmFkZF9mYWlzc19pbmRleChjb2x1bW4lM0QlMjJlbWJlZGRpbmdzJTIyKQ==",highlighted:'embeddings_dataset.add_faiss_index(column=<span class="hljs-string">"embeddings"</span>)',wrap:!1}});const Et=[bs,ys],He=[];function Ht(e,l){return e[0]==="pt"?0:1}return x=Ht(C),v=He[x]=Et[x](C),Be=new T({props:{code:"c2NvcmVzJTJDJTIwc2FtcGxlcyUyMCUzRCUyMGVtYmVkZGluZ3NfZGF0YXNldC5nZXRfbmVhcmVzdF9leGFtcGxlcyglMEElMjAlMjAlMjAlMjAlMjJlbWJlZGRpbmdzJTIyJTJDJTIwcXVlc3Rpb25fZW1iZWRkaW5nJTJDJTIwayUzRDUlMEEp",highlighted:`scores, samples = embeddings_dataset.get_nearest_examples( | |
| <span class="hljs-string">"embeddings"</span>, question_embedding, k=<span class="hljs-number">5</span> | |
| )`,wrap:!1}}),Xe=new T({props:{code:"aW1wb3J0JTIwcGFuZGFzJTIwYXMlMjBwZCUwQSUwQXNhbXBsZXNfZGYlMjAlM0QlMjBwZC5EYXRhRnJhbWUuZnJvbV9kaWN0KHNhbXBsZXMpJTBBc2FtcGxlc19kZiU1QiUyMnNjb3JlcyUyMiU1RCUyMCUzRCUyMHNjb3JlcyUwQXNhbXBsZXNfZGYuc29ydF92YWx1ZXMoJTIyc2NvcmVzJTIyJTJDJTIwYXNjZW5kaW5nJTNERmFsc2UlMkMlMjBpbnBsYWNlJTNEVHJ1ZSk=",highlighted:`<span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd | |
| samples_df = pd.DataFrame.from_dict(samples) | |
| samples_df[<span class="hljs-string">"scores"</span>] = scores | |
| samples_df.sort_values(<span class="hljs-string">"scores"</span>, ascending=<span class="hljs-literal">False</span>, inplace=<span class="hljs-literal">True</span>)`,wrap:!1}}),ve=new T({props:{code:"Zm9yJTIwXyUyQyUyMHJvdyUyMGluJTIwc2FtcGxlc19kZi5pdGVycm93cygpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMkNPTU1FTlQlM0ElMjAlN0Jyb3cuY29tbWVudHMlN0QlMjIpJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlNDT1JFJTNBJTIwJTdCcm93LnNjb3JlcyU3RCUyMiklMEElMjAlMjAlMjAlMjBwcmludChmJTIyVElUTEUlM0ElMjAlN0Jyb3cudGl0bGUlN0QlMjIpJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlVSTCUzQSUyMCU3QnJvdy5odG1sX3VybCU3RCUyMiklMEElMjAlMjAlMjAlMjBwcmludCglMjIlM0QlMjIlMjAqJTIwNTApJTBBJTIwJTIwJTIwJTIwcHJpbnQoKQ==",highlighted:`<span class="hljs-keyword">for</span> _, row <span class="hljs-keyword">in</span> samples_df.iterrows(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"COMMENT: <span class="hljs-subst">{row.comments}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"SCORE: <span class="hljs-subst">{row.scores}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"TITLE: <span class="hljs-subst">{row.title}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"URL: <span class="hljs-subst">{row.html_url}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"="</span> * <span class="hljs-number">50</span>) | |
| <span class="hljs-built_in">print</span>()`,wrap:!1}}),Ve=new T({props:{code:"JTIyJTIyJTIyJTBBQ09NTUVOVCUzQSUyMFJlcXVpcmluZyUyMG9ubGluZSUyMGNvbm5lY3Rpb24lMjBpcyUyMGElMjBkZWFsJTIwYnJlYWtlciUyMGluJTIwc29tZSUyMGNhc2VzJTIwdW5mb3J0dW5hdGVseSUyMHNvJTIwaXQnZCUyMGJlJTIwZ3JlYXQlMjBpZiUyMG9mZmxpbmUlMjBtb2RlJTIwaXMlMjBhZGRlZCUyMHNpbWlsYXIlMjB0byUyMGhvdyUyMCU2MHRyYW5zZm9ybWVycyU2MCUyMGxvYWRzJTIwbW9kZWxzJTIwb2ZmbGluZSUyMGZpbmUuJTBBJTBBJTQwbWFuZHViaWFuJ3MlMjBzZWNvbmQlMjBidWxsZXQlMjBwb2ludCUyMHN1Z2dlc3RzJTIwdGhhdCUyMHRoZXJlJ3MlMjBhJTIwd29ya2Fyb3VuZCUyMGFsbG93aW5nJTIweW91JTIwdG8lMjB1c2UlMjB5b3VyJTIwb2ZmbGluZSUyMChjdXN0b20lM0YpJTIwZGF0YXNldCUyMHdpdGglMjAlNjBkYXRhc2V0cyU2MC4lMjBDb3VsZCUyMHlvdSUyMHBsZWFzZSUyMGVsYWJvcmF0ZSUyMG9uJTIwaG93JTIwdGhhdCUyMHNob3VsZCUyMGxvb2slMjBsaWtlJTNGJTBBU0NPUkUlM0ElMjAyNS41MDUwNDY4NDQ0ODI0MjIlMEFUSVRMRSUzQSUyMERpc2N1c3Npb24lMjB1c2luZyUyMGRhdGFzZXRzJTIwaW4lMjBvZmZsaW5lJTIwbW9kZSUwQVVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZpc3N1ZXMlMkY4MjQlMEElM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlMEElMEFDT01NRU5UJTNBJTIwVGhlJTIwbG9jYWwlMjBkYXRhc2V0JTIwYnVpbGRlcnMlMjAoY3N2JTJDJTIwdGV4dCUyMCUyQyUyMGpzb24lMjBhbmQlMjBwYW5kYXMpJTIwYXJlJTIwbm93JTIwcGFydCUyMG9mJTIwdGhlJTIwJTYwZGF0YXNldHMlNjAlMjBwYWNrYWdlJTIwc2luY2UlMjAlMjMxNzI2JTIwJTNBKSUwQVlvdSUyMGNhbiUyMG5vdyUyMHVzZSUyMHRoZW0lMjBvZmZsaW5lJTBBJTVDJTYwJTVDJTYwJTVDJTYwcHl0aG9uJTBBZGF0YXNldHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIydGV4dCUyMiUyQyUyMGRhdGFfZmlsZXMlM0RkYXRhX2ZpbGVzKSUwQSU1QyU2MCU1QyU2MCU1QyU2MCUwQSUwQVdlJ2xsJTIwZG8lMjBhJTIwbmV3JTIwcmVsZWFzZSUyMHNvb24lMEFTQ09SRSUzQSUyMDI0LjU1NTUwOTU2NzI2MDc0MiUwQVRJVExFJTNBJTIwRGlzY3Vzc2lvbiUyMHVzaW5nJTIwZGF0YXNldHMlMjBpbiUyMG9mZmxpbmUlMjBtb2RlJTBBVVJMJTNBJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjgyNCUwQSUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUwQSUwQUNPTU1FTlQlM0ElMjBJJTIwb3BlbmVkJTIwYSUyMFBSJTIwdGhhdCUyMGFsbG93cyUyMHRvJTIwcmVsb2FkJTIwbW9kdWxlcyUyMHRoYXQlMjBoYXZlJTIwYWxyZWFkeSUyMGJlZW4lMjBsb2FkZWQlMjBvbmNlJTIwZXZlbiUyMGlmJTIwdGhlcmUncyUyMG5vJTIwaW50ZXJuZXQuJTBBJTBBTGV0JTIwbWUlMjBrbm93JTIwaWYlMjB5b3UlMjBrbm93JTIwb3RoZXIlMjB3YXlzJTIwdGhhdCUyMGNhbiUyMG1ha2UlMjB0aGUlMjBvZmZsaW5lJTIwbW9kZSUyMGV4cGVyaWVuY2UlMjBiZXR0ZXIuJTIwSSdkJTIwYmUlMjBoYXBweSUyMHRvJTIwYWRkJTIwdGhlbSUyMCUzQSklMEElMEFJJTIwYWxyZWFkeSUyMG5vdGUlMjB0aGUlMjAlMjJmcmVlemUlMjIlMjBtb2R1bGVzJTIwb3B0aW9uJTJDJTIwdG8lMjBwcmV2ZW50JTIwbG9jYWwlMjBtb2R1bGVzJTIwdXBkYXRlcy4lMjBJdCUyMHdvdWxkJTIwYmUlMjBhJTIwY29vbCUyMGZlYXR1cmUuJTBBJTBBLS0tLS0tLS0tLSUwQSUwQSUzRSUyMCU0MG1hbmR1YmlhbidzJTIwc2Vjb25kJTIwYnVsbGV0JTIwcG9pbnQlMjBzdWdnZXN0cyUyMHRoYXQlMjB0aGVyZSdzJTIwYSUyMHdvcmthcm91bmQlMjBhbGxvd2luZyUyMHlvdSUyMHRvJTIwdXNlJTIweW91ciUyMG9mZmxpbmUlMjAoY3VzdG9tJTNGKSUyMGRhdGFzZXQlMjB3aXRoJTIwJTYwZGF0YXNldHMlNjAuJTIwQ291bGQlMjB5b3UlMjBwbGVhc2UlMjBlbGFib3JhdGUlMjBvbiUyMGhvdyUyMHRoYXQlMjBzaG91bGQlMjBsb29rJTIwbGlrZSUzRiUwQSUwQUluZGVlZCUyMCU2MGxvYWRfZGF0YXNldCU2MCUyMGFsbG93cyUyMHRvJTIwbG9hZCUyMHJlbW90ZSUyMGRhdGFzZXQlMjBzY3JpcHQlMjAoc3F1YWQlMkMlMjBnbHVlJTJDJTIwZXRjLiklMjBidXQlMjBhbHNvJTIweW91JTIwb3duJTIwbG9jYWwlMjBvbmVzLiUwQUZvciUyMGV4YW1wbGUlMjBpZiUyMHlvdSUyMGhhdmUlMjBhJTIwZGF0YXNldCUyMHNjcmlwdCUyMGF0JTIwJTYwLiUyRm15X2RhdGFzZXQlMkZteV9kYXRhc2V0LnB5JTYwJTIwdGhlbiUyMHlvdSUyMGNhbiUyMGRvJTBBJTVDJTYwJTVDJTYwJTVDJTYwcHl0aG9uJTBBbG9hZF9kYXRhc2V0KCUyMi4lMkZteV9kYXRhc2V0JTIyKSUwQSU1QyU2MCU1QyU2MCU1QyU2MCUwQWFuZCUyMHRoZSUyMGRhdGFzZXQlMjBzY3JpcHQlMjB3aWxsJTIwZ2VuZXJhdGUlMjB5b3VyJTIwZGF0YXNldCUyMG9uY2UlMjBhbmQlMjBmb3IlMjBhbGwuJTBBJTBBLS0tLS0tLS0tLSUwQSUwQUFib3V0JTIwSSdtJTIwbG9va2luZyUyMGludG8lMjBoYXZpbmclMjAlNjBjc3YlNjAlMkMlMjAlNjBqc29uJTYwJTJDJTIwJTYwdGV4dCU2MCUyQyUyMCU2MHBhbmRhcyU2MCUyMGRhdGFzZXQlMjBidWlsZGVycyUyMGFscmVhZHklMjBpbmNsdWRlZCUyMGluJTIwdGhlJTIwJTYwZGF0YXNldHMlNjAlMjBwYWNrYWdlJTJDJTIwc28lMjB0aGF0JTIwdGhleSUyMGFyZSUyMGF2YWlsYWJsZSUyMG9mZmxpbmUlMjBieSUyMGRlZmF1bHQlMkMlMjBhcyUyMG9wcG9zZWQlMjB0byUyMHRoZSUyMG90aGVyJTIwZGF0YXNldHMlMjB0aGF0JTIwcmVxdWlyZSUyMHRoZSUyMHNjcmlwdCUyMHRvJTIwYmUlMjBkb3dubG9hZGVkLiUwQWNmJTIwJTIzMTcyNCUwQVNDT1JFJTNBJTIwMjQuMTQ4OTY1ODM1NTcxMjklMEFUSVRMRSUzQSUyMERpc2N1c3Npb24lMjB1c2luZyUyMGRhdGFzZXRzJTIwaW4lMjBvZmZsaW5lJTIwbW9kZSUwQVVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZpc3N1ZXMlMkY4MjQlMEElM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlMEElMEFDT01NRU5UJTNBJTIwJTNFJTIwaGVyZSUyMGlzJTIwbXklMjB3YXklMjB0byUyMGxvYWQlMjBhJTIwZGF0YXNldCUyMG9mZmxpbmUlMkMlMjBidXQlMjBpdCUyMCoqcmVxdWlyZXMqKiUyMGFuJTIwb25saW5lJTIwbWFjaGluZSUwQSUzRSUwQSUzRSUyMDEuJTIwKG9ubGluZSUyMG1hY2hpbmUpJTBBJTNFJTBBJTNFJTIwJTYwJTYwJTYwJTBBJTNFJTBBJTNFJTIwaW1wb3J0JTIwZGF0YXNldHMlMEElM0UlMEElM0UlMjBkYXRhJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KC4uLiklMEElM0UlMEElM0UlMjBkYXRhLnNhdmVfdG9fZGlzayglMkZZT1VSJTJGREFUQVNFVCUyRkRJUiklMEElM0UlMEElM0UlMjAlNjAlNjAlNjAlMEElM0UlMEElM0UlMjAyLiUyMGNvcHklMjB0aGUlMjBkaXIlMjBmcm9tJTIwb25saW5lJTIwdG8lMjB0aGUlMjBvZmZsaW5lJTIwbWFjaGluZSUwQSUzRSUwQSUzRSUyMDMuJTIwKG9mZmxpbmUlMjBtYWNoaW5lKSUwQSUzRSUwQSUzRSUyMCU2MCU2MCU2MCUwQSUzRSUwQSUzRSUyMGltcG9ydCUyMGRhdGFzZXRzJTBBJTNFJTBBJTNFJTIwZGF0YSUyMCUzRCUyMGRhdGFzZXRzLmxvYWRfZnJvbV9kaXNrKCUyRlNBVkVEJTJGREFUQSUyRkRJUiklMEElM0UlMEElM0UlMjAlNjAlNjAlNjAlMEElM0UlMEElM0UlMEElM0UlMEElM0UlMjBIVEguJTBBJTBBJTBBU0NPUkUlM0ElMjAyMi44OTM5OTMzNzc2ODU1NDclMEFUSVRMRSUzQSUyMERpc2N1c3Npb24lMjB1c2luZyUyMGRhdGFzZXRzJTIwaW4lMjBvZmZsaW5lJTIwbW9kZSUwQVVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZpc3N1ZXMlMkY4MjQlMEElM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlMEElMEFDT01NRU5UJTNBJTIwaGVyZSUyMGlzJTIwbXklMjB3YXklMjB0byUyMGxvYWQlMjBhJTIwZGF0YXNldCUyMG9mZmxpbmUlMkMlMjBidXQlMjBpdCUyMCoqcmVxdWlyZXMqKiUyMGFuJTIwb25saW5lJTIwbWFjaGluZSUwQTEuJTIwKG9ubGluZSUyMG1hY2hpbmUpJTBBJTVDJTYwJTVDJTYwJTVDJTYwJTBBaW1wb3J0JTIwZGF0YXNldHMlMEFkYXRhJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KC4uLiklMEFkYXRhLnNhdmVfdG9fZGlzayglMkZZT1VSJTJGREFUQVNFVCUyRkRJUiklMEElNUMlNjAlNUMlNjAlNUMlNjAlMEEyLiUyMGNvcHklMjB0aGUlMjBkaXIlMjBmcm9tJTIwb25saW5lJTIwdG8lMjB0aGUlMjBvZmZsaW5lJTIwbWFjaGluZSUwQTMuJTIwKG9mZmxpbmUlMjBtYWNoaW5lKSUwQSU1QyU2MCU1QyU2MCU1QyU2MCUwQWltcG9ydCUyMGRhdGFzZXRzJTBBZGF0YSUyMCUzRCUyMGRhdGFzZXRzLmxvYWRfZnJvbV9kaXNrKCUyRlNBVkVEJTJGREFUQSUyRkRJUiklMEElNUMlNjAlNUMlNjAlNUMlNjAlMEElMEFIVEguJTBBU0NPUkUlM0ElMjAyMi40MDY2MzUyODQ0MjM4MjglMEFUSVRMRSUzQSUyMERpc2N1c3Npb24lMjB1c2luZyUyMGRhdGFzZXRzJTIwaW4lMjBvZmZsaW5lJTIwbW9kZSUwQVVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZpc3N1ZXMlMkY4MjQlMEElM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlMEElMjIlMjIlMjI=",highlighted:`<span class="hljs-string">""" | |
| COMMENT: Requiring online connection is a deal breaker in some cases unfortunately so it'd be great if offline mode is added similar to how \`transformers\` loads models offline fine. | |
| @mandubian's second bullet point suggests that there's a workaround allowing you to use your offline (custom?) dataset with \`datasets\`. Could you please elaborate on how that should look like? | |
| SCORE: 25.505046844482422 | |
| TITLE: Discussion using datasets in offline mode | |
| URL: https://github.com/huggingface/datasets/issues/824 | |
| ================================================== | |
| COMMENT: The local dataset builders (csv, text , json and pandas) are now part of the \`datasets\` package since #1726 :) | |
| You can now use them offline | |
| \\\`\\\`\\\`python | |
| datasets = load_dataset("text", data_files=data_files) | |
| \\\`\\\`\\\` | |
| We'll do a new release soon | |
| SCORE: 24.555509567260742 | |
| TITLE: Discussion using datasets in offline mode | |
| URL: https://github.com/huggingface/datasets/issues/824 | |
| ================================================== | |
| COMMENT: I opened a PR that allows to reload modules that have already been loaded once even if there's no internet. | |
| Let me know if you know other ways that can make the offline mode experience better. I'd be happy to add them :) | |
| I already note the "freeze" modules option, to prevent local modules updates. It would be a cool feature. | |
| ---------- | |
| > @mandubian's second bullet point suggests that there's a workaround allowing you to use your offline (custom?) dataset with \`datasets\`. Could you please elaborate on how that should look like? | |
| Indeed \`load_dataset\` allows to load remote dataset script (squad, glue, etc.) but also you own local ones. | |
| For example if you have a dataset script at \`./my_dataset/my_dataset.py\` then you can do | |
| \\\`\\\`\\\`python | |
| load_dataset("./my_dataset") | |
| \\\`\\\`\\\` | |
| and the dataset script will generate your dataset once and for all. | |
| ---------- | |
| About I'm looking into having \`csv\`, \`json\`, \`text\`, \`pandas\` dataset builders already included in the \`datasets\` package, so that they are available offline by default, as opposed to the other datasets that require the script to be downloaded. | |
| cf #1724 | |
| SCORE: 24.14896583557129 | |
| TITLE: Discussion using datasets in offline mode | |
| URL: https://github.com/huggingface/datasets/issues/824 | |
| ================================================== | |
| COMMENT: > here is my way to load a dataset offline, but it **requires** an online machine | |
| > | |
| > 1. (online machine) | |
| > | |
| > \`\`\` | |
| > | |
| > import datasets | |
| > | |
| > data = datasets.load_dataset(...) | |
| > | |
| > data.save_to_disk(/YOUR/DATASET/DIR) | |
| > | |
| > \`\`\` | |
| > | |
| > 2. copy the dir from online to the offline machine | |
| > | |
| > 3. (offline machine) | |
| > | |
| > \`\`\` | |
| > | |
| > import datasets | |
| > | |
| > data = datasets.load_from_disk(/SAVED/DATA/DIR) | |
| > | |
| > \`\`\` | |
| > | |
| > | |
| > | |
| > HTH. | |
| SCORE: 22.893993377685547 | |
| TITLE: Discussion using datasets in offline mode | |
| URL: https://github.com/huggingface/datasets/issues/824 | |
| ================================================== | |
| COMMENT: here is my way to load a dataset offline, but it **requires** an online machine | |
| 1. (online machine) | |
| \\\`\\\`\\\` | |
| import datasets | |
| data = datasets.load_dataset(...) | |
| data.save_to_disk(/YOUR/DATASET/DIR) | |
| \\\`\\\`\\\` | |
| 2. copy the dir from online to the offline machine | |
| 3. (offline machine) | |
| \\\`\\\`\\\` | |
| import datasets | |
| data = datasets.load_from_disk(/SAVED/DATA/DIR) | |
| \\\`\\\`\\\` | |
| HTH. | |
| SCORE: 22.406635284423828 | |
| TITLE: Discussion using datasets in offline mode | |
| URL: https://github.com/huggingface/datasets/issues/824 | |
| ================================================== | |
| """</span>`,wrap:!1}}),Y=new Pt({props:{$$slots:{default:[Js]},$$scope:{ctx:C}}}),Ye=new rs({props:{source:"https://github.com/huggingface/course/blob/main/chapters/it/chapter5/6.mdx"}}),{c(){a=h("meta"),p=o(),n=h("p"),w=o(),u(M.$$.fragment),m=o(),u(G.$$.fragment),J=o(),Z.c(),R=o(),k=h("p"),k.innerHTML=I,$=o(),u(i.$$.fragment),g=o(),u(S.$$.fragment),Oe=o(),F=h("p"),F.innerHTML=dt,el=o(),E=h("p"),E.textContent=pt,ll=o(),V=h("div"),V.innerHTML=mt,tl=o(),u(H.$$.fragment),sl=o(),A=h("p"),A.textContent=ut,al=o(),u(q.$$.fragment),nl=o(),D=h("p"),D.innerHTML=Ut,il=o(),u(L.$$.fragment),ol=o(),u(P.$$.fragment),rl=o(),K=h("p"),K.innerHTML=yt,Ml=o(),u(O.$$.fragment),cl=o(),u(ee.$$.fragment),dl=o(),le=h("p"),le.innerHTML=bt,pl=o(),u(te.$$.fragment),ml=o(),u(se.$$.fragment),ul=o(),ae=h("p"),ae.innerHTML=Jt,Ul=o(),u(ne.$$.fragment),yl=o(),ie=h("p"),ie.innerHTML=ht,bl=o(),u(oe.$$.fragment),Jl=o(),u(re.$$.fragment),hl=o(),Me=h("p"),Me.innerHTML=ft,fl=o(),u(ce.$$.fragment),Tl=o(),_=h("table"),_.innerHTML=Tt,jl=o(),de=h("p"),de.innerHTML=jt,wl=o(),u(pe.$$.fragment),gl=o(),u(me.$$.fragment),Ql=o(),ue=h("p"),ue.textContent=wt,Zl=o(),u(W.$$.fragment),Gl=o(),Ue=h("p"),Ue.innerHTML=gt,Cl=o(),u(ye.$$.fragment),Il=o(),be=h("p"),be.textContent=Qt,kl=o(),u(Je.$$.fragment),$l=o(),u(he.$$.fragment),Rl=o(),fe=h("p"),fe.innerHTML=Zt,_l=o(),u(Te.$$.fragment),Nl=o(),je=h("p"),je.textContent=Gt,Bl=o(),u(we.$$.fragment),zl=o(),ge=h("p"),ge.innerHTML=Ct,Xl=o(),B.c(),Ae=o(),Qe=h("p"),Qe.innerHTML=It,xl=o(),u(Ze.$$.fragment),vl=o(),Ge=h("p"),Ge.textContent=kt,Vl=o(),X.c(),qe=o(),Ce=h("p"),Ce.textContent=$t,Wl=o(),u(Ie.$$.fragment),Yl=o(),ke=h("p"),ke.innerHTML=Rt,Sl=o(),$e=h("p"),$e.innerHTML=_t,Fl=o(),u(Re.$$.fragment),El=o(),_e=h("p"),_e.innerHTML=Nt,Hl=o(),v.c(),De=o(),Ne=h("p"),Ne.textContent=Bt,Al=o(),u(Be.$$.fragment),ql=o(),ze=h("p"),ze.innerHTML=zt,Dl=o(),u(Xe.$$.fragment),Ll=o(),xe=h("p"),xe.textContent=Xt,Pl=o(),u(ve.$$.fragment),Kl=o(),u(Ve.$$.fragment),Ol=o(),We=h("p"),We.textContent=xt,et=o(),u(Y.$$.fragment),lt=o(),u(Ye.$$.fragment),tt=o(),Le=h("p"),this.h()},l(e){const l=as("svelte-u9bgzb",document.head);a=f(l,"META",{name:!0,content:!0}),l.forEach(t),p=r(e),n=f(e,"P",{}),Lt(n).forEach(t),w=r(e),U(M.$$.fragment,e),m=r(e),U(G.$$.fragment,e),J=r(e),Z.l(e),R=r(e),k=f(e,"P",{"data-svelte-h":!0}),j(k)!=="svelte-zmfqyi"&&(k.innerHTML=I),$=r(e),U(i.$$.fragment,e),g=r(e),U(S.$$.fragment,e),Oe=r(e),F=f(e,"P",{"data-svelte-h":!0}),j(F)!=="svelte-ol4drb"&&(F.innerHTML=dt),el=r(e),E=f(e,"P",{"data-svelte-h":!0}),j(E)!=="svelte-17yhsv1"&&(E.textContent=pt),ll=r(e),V=f(e,"DIV",{class:!0,"data-svelte-h":!0}),j(V)!=="svelte-yxatr"&&(V.innerHTML=mt),tl=r(e),U(H.$$.fragment,e),sl=r(e),A=f(e,"P",{"data-svelte-h":!0}),j(A)!=="svelte-a3kzw0"&&(A.textContent=ut),al=r(e),U(q.$$.fragment,e),nl=r(e),D=f(e,"P",{"data-svelte-h":!0}),j(D)!=="svelte-162oxkz"&&(D.innerHTML=Ut),il=r(e),U(L.$$.fragment,e),ol=r(e),U(P.$$.fragment,e),rl=r(e),K=f(e,"P",{"data-svelte-h":!0}),j(K)!=="svelte-nxqhiv"&&(K.innerHTML=yt),Ml=r(e),U(O.$$.fragment,e),cl=r(e),U(ee.$$.fragment,e),dl=r(e),le=f(e,"P",{"data-svelte-h":!0}),j(le)!=="svelte-ikowdy"&&(le.innerHTML=bt),pl=r(e),U(te.$$.fragment,e),ml=r(e),U(se.$$.fragment,e),ul=r(e),ae=f(e,"P",{"data-svelte-h":!0}),j(ae)!=="svelte-1ofr137"&&(ae.innerHTML=Jt),Ul=r(e),U(ne.$$.fragment,e),yl=r(e),ie=f(e,"P",{"data-svelte-h":!0}),j(ie)!=="svelte-1ltyg57"&&(ie.innerHTML=ht),bl=r(e),U(oe.$$.fragment,e),Jl=r(e),U(re.$$.fragment,e),hl=r(e),Me=f(e,"P",{"data-svelte-h":!0}),j(Me)!=="svelte-1aw5ud4"&&(Me.innerHTML=ft),fl=r(e),U(ce.$$.fragment,e),Tl=r(e),_=f(e,"TABLE",{border:!0,class:!0,style:!0,"data-svelte-h":!0}),j(_)!=="svelte-1g5whzd"&&(_.innerHTML=Tt),jl=r(e),de=f(e,"P",{"data-svelte-h":!0}),j(de)!=="svelte-1co8std"&&(de.innerHTML=jt),wl=r(e),U(pe.$$.fragment,e),gl=r(e),U(me.$$.fragment,e),Ql=r(e),ue=f(e,"P",{"data-svelte-h":!0}),j(ue)!=="svelte-ul1oyx"&&(ue.textContent=wt),Zl=r(e),U(W.$$.fragment,e),Gl=r(e),Ue=f(e,"P",{"data-svelte-h":!0}),j(Ue)!=="svelte-txz5a1"&&(Ue.innerHTML=gt),Cl=r(e),U(ye.$$.fragment,e),Il=r(e),be=f(e,"P",{"data-svelte-h":!0}),j(be)!=="svelte-q63izi"&&(be.textContent=Qt),kl=r(e),U(Je.$$.fragment,e),$l=r(e),U(he.$$.fragment,e),Rl=r(e),fe=f(e,"P",{"data-svelte-h":!0}),j(fe)!=="svelte-1w0qnmy"&&(fe.innerHTML=Zt),_l=r(e),U(Te.$$.fragment,e),Nl=r(e),je=f(e,"P",{"data-svelte-h":!0}),j(je)!=="svelte-1evji84"&&(je.textContent=Gt),Bl=r(e),U(we.$$.fragment,e),zl=r(e),ge=f(e,"P",{"data-svelte-h":!0}),j(ge)!=="svelte-gakban"&&(ge.innerHTML=Ct),Xl=r(e),B.l(e),Ae=r(e),Qe=f(e,"P",{"data-svelte-h":!0}),j(Qe)!=="svelte-1blfpo3"&&(Qe.innerHTML=It),xl=r(e),U(Ze.$$.fragment,e),vl=r(e),Ge=f(e,"P",{"data-svelte-h":!0}),j(Ge)!=="svelte-98guag"&&(Ge.textContent=kt),Vl=r(e),X.l(e),qe=r(e),Ce=f(e,"P",{"data-svelte-h":!0}),j(Ce)!=="svelte-coeq7k"&&(Ce.textContent=$t),Wl=r(e),U(Ie.$$.fragment,e),Yl=r(e),ke=f(e,"P",{"data-svelte-h":!0}),j(ke)!=="svelte-1qbifcu"&&(ke.innerHTML=Rt),Sl=r(e),$e=f(e,"P",{"data-svelte-h":!0}),j($e)!=="svelte-2zpjb1"&&($e.innerHTML=_t),Fl=r(e),U(Re.$$.fragment,e),El=r(e),_e=f(e,"P",{"data-svelte-h":!0}),j(_e)!=="svelte-gcr4ir"&&(_e.innerHTML=Nt),Hl=r(e),v.l(e),De=r(e),Ne=f(e,"P",{"data-svelte-h":!0}),j(Ne)!=="svelte-wmwufy"&&(Ne.textContent=Bt),Al=r(e),U(Be.$$.fragment,e),ql=r(e),ze=f(e,"P",{"data-svelte-h":!0}),j(ze)!=="svelte-m2y78c"&&(ze.innerHTML=zt),Dl=r(e),U(Xe.$$.fragment,e),Ll=r(e),xe=f(e,"P",{"data-svelte-h":!0}),j(xe)!=="svelte-1x83w58"&&(xe.textContent=Xt),Pl=r(e),U(ve.$$.fragment,e),Kl=r(e),U(Ve.$$.fragment,e),Ol=r(e),We=f(e,"P",{"data-svelte-h":!0}),j(We)!=="svelte-19hbgpn"&&(We.textContent=xt),et=r(e),U(Y.$$.fragment,e),lt=r(e),U(Ye.$$.fragment,e),tt=r(e),Le=f(e,"P",{}),Lt(Le).forEach(t),this.h()},h(){Pe(a,"name","hf:doc:metadata"),Pe(a,"content",fs),Pe(V,"class","flex justify-center"),Pe(_,"border","1"),Pe(_,"class","dataframe"),ct(_,"table-layout","fixed"),ct(_,"word-wrap","break-word"),ct(_,"width","100%")},m(e,l){ns(document.head,a),s(e,p,l),s(e,n,l),s(e,w,l),y(M,e,l),s(e,m,l),y(G,e,l),s(e,J,l),Se[Q].m(e,l),s(e,R,l),s(e,k,l),s(e,$,l),y(i,e,l),s(e,g,l),y(S,e,l),s(e,Oe,l),s(e,F,l),s(e,el,l),s(e,E,l),s(e,ll,l),s(e,V,l),s(e,tl,l),y(H,e,l),s(e,sl,l),s(e,A,l),s(e,al,l),y(q,e,l),s(e,nl,l),s(e,D,l),s(e,il,l),y(L,e,l),s(e,ol,l),y(P,e,l),s(e,rl,l),s(e,K,l),s(e,Ml,l),y(O,e,l),s(e,cl,l),y(ee,e,l),s(e,dl,l),s(e,le,l),s(e,pl,l),y(te,e,l),s(e,ml,l),y(se,e,l),s(e,ul,l),s(e,ae,l),s(e,Ul,l),y(ne,e,l),s(e,yl,l),s(e,ie,l),s(e,bl,l),y(oe,e,l),s(e,Jl,l),y(re,e,l),s(e,hl,l),s(e,Me,l),s(e,fl,l),y(ce,e,l),s(e,Tl,l),s(e,_,l),s(e,jl,l),s(e,de,l),s(e,wl,l),y(pe,e,l),s(e,gl,l),y(me,e,l),s(e,Ql,l),s(e,ue,l),s(e,Zl,l),y(W,e,l),s(e,Gl,l),s(e,Ue,l),s(e,Cl,l),y(ye,e,l),s(e,Il,l),s(e,be,l),s(e,kl,l),y(Je,e,l),s(e,$l,l),y(he,e,l),s(e,Rl,l),s(e,fe,l),s(e,_l,l),y(Te,e,l),s(e,Nl,l),s(e,je,l),s(e,Bl,l),y(we,e,l),s(e,zl,l),s(e,ge,l),s(e,Xl,l),Fe[N].m(e,l),s(e,Ae,l),s(e,Qe,l),s(e,xl,l),y(Ze,e,l),s(e,vl,l),s(e,Ge,l),s(e,Vl,l),Ee[z].m(e,l),s(e,qe,l),s(e,Ce,l),s(e,Wl,l),y(Ie,e,l),s(e,Yl,l),s(e,ke,l),s(e,Sl,l),s(e,$e,l),s(e,Fl,l),y(Re,e,l),s(e,El,l),s(e,_e,l),s(e,Hl,l),He[x].m(e,l),s(e,De,l),s(e,Ne,l),s(e,Al,l),y(Be,e,l),s(e,ql,l),s(e,ze,l),s(e,Dl,l),y(Xe,e,l),s(e,Ll,l),s(e,xe,l),s(e,Pl,l),y(ve,e,l),s(e,Kl,l),y(Ve,e,l),s(e,Ol,l),s(e,We,l),s(e,et,l),y(Y,e,l),s(e,lt,l),y(Ye,e,l),s(e,tt,l),s(e,Le,l),st=!0},p(e,[l]){const At={};l&1&&(At.fw=e[0]),M.$set(At);let it=Q;Q=Vt(e),Q!==it&&(nt(),c(Se[it],1,1,()=>{Se[it]=null}),at(),Z=Se[Q],Z||(Z=Se[Q]=vt[Q](e),Z.c()),d(Z,1),Z.m(R.parentNode,R));const qt={};l&2&&(qt.$$scope={dirty:l,ctx:e}),W.$set(qt);let ot=N;N=Yt(e),N!==ot&&(nt(),c(Fe[ot],1,1,()=>{Fe[ot]=null}),at(),B=Fe[N],B||(B=Fe[N]=Wt[N](e),B.c()),d(B,1),B.m(Ae.parentNode,Ae));let rt=z;z=Ft(e),z!==rt&&(nt(),c(Ee[rt],1,1,()=>{Ee[rt]=null}),at(),X=Ee[z],X||(X=Ee[z]=St[z](e),X.c()),d(X,1),X.m(qe.parentNode,qe));let Mt=x;x=Ht(e),x!==Mt&&(nt(),c(He[Mt],1,1,()=>{He[Mt]=null}),at(),v=He[x],v||(v=He[x]=Et[x](e),v.c()),d(v,1),v.m(De.parentNode,De));const Dt={};l&2&&(Dt.$$scope={dirty:l,ctx:e}),Y.$set(Dt)},i(e){st||(d(M.$$.fragment,e),d(G.$$.fragment,e),d(Z),d(i.$$.fragment,e),d(S.$$.fragment,e),d(H.$$.fragment,e),d(q.$$.fragment,e),d(L.$$.fragment,e),d(P.$$.fragment,e),d(O.$$.fragment,e),d(ee.$$.fragment,e),d(te.$$.fragment,e),d(se.$$.fragment,e),d(ne.$$.fragment,e),d(oe.$$.fragment,e),d(re.$$.fragment,e),d(ce.$$.fragment,e),d(pe.$$.fragment,e),d(me.$$.fragment,e),d(W.$$.fragment,e),d(ye.$$.fragment,e),d(Je.$$.fragment,e),d(he.$$.fragment,e),d(Te.$$.fragment,e),d(we.$$.fragment,e),d(B),d(Ze.$$.fragment,e),d(X),d(Ie.$$.fragment,e),d(Re.$$.fragment,e),d(v),d(Be.$$.fragment,e),d(Xe.$$.fragment,e),d(ve.$$.fragment,e),d(Ve.$$.fragment,e),d(Y.$$.fragment,e),d(Ye.$$.fragment,e),st=!0)},o(e){c(M.$$.fragment,e),c(G.$$.fragment,e),c(Z),c(i.$$.fragment,e),c(S.$$.fragment,e),c(H.$$.fragment,e),c(q.$$.fragment,e),c(L.$$.fragment,e),c(P.$$.fragment,e),c(O.$$.fragment,e),c(ee.$$.fragment,e),c(te.$$.fragment,e),c(se.$$.fragment,e),c(ne.$$.fragment,e),c(oe.$$.fragment,e),c(re.$$.fragment,e),c(ce.$$.fragment,e),c(pe.$$.fragment,e),c(me.$$.fragment,e),c(W.$$.fragment,e),c(ye.$$.fragment,e),c(Je.$$.fragment,e),c(he.$$.fragment,e),c(Te.$$.fragment,e),c(we.$$.fragment,e),c(B),c(Ze.$$.fragment,e),c(X),c(Ie.$$.fragment,e),c(Re.$$.fragment,e),c(v),c(Be.$$.fragment,e),c(Xe.$$.fragment,e),c(ve.$$.fragment,e),c(Ve.$$.fragment,e),c(Y.$$.fragment,e),c(Ye.$$.fragment,e),st=!1},d(e){e&&(t(p),t(n),t(w),t(m),t(J),t(R),t(k),t($),t(g),t(Oe),t(F),t(el),t(E),t(ll),t(V),t(tl),t(sl),t(A),t(al),t(nl),t(D),t(il),t(ol),t(rl),t(K),t(Ml),t(cl),t(dl),t(le),t(pl),t(ml),t(ul),t(ae),t(Ul),t(yl),t(ie),t(bl),t(Jl),t(hl),t(Me),t(fl),t(Tl),t(_),t(jl),t(de),t(wl),t(gl),t(Ql),t(ue),t(Zl),t(Gl),t(Ue),t(Cl),t(Il),t(be),t(kl),t($l),t(Rl),t(fe),t(_l),t(Nl),t(je),t(Bl),t(zl),t(ge),t(Xl),t(Ae),t(Qe),t(xl),t(vl),t(Ge),t(Vl),t(qe),t(Ce),t(Wl),t(Yl),t(ke),t(Sl),t($e),t(Fl),t(El),t(_e),t(Hl),t(De),t(Ne),t(Al),t(ql),t(ze),t(Dl),t(Ll),t(xe),t(Pl),t(Kl),t(Ol),t(We),t(et),t(lt),t(tt),t(Le)),t(a),b(M,e),b(G,e),Se[Q].d(e),b(i,e),b(S,e),b(H,e),b(q,e),b(L,e),b(P,e),b(O,e),b(ee,e),b(te,e),b(se,e),b(ne,e),b(oe,e),b(re,e),b(ce,e),b(pe,e),b(me,e),b(W,e),b(ye,e),b(Je,e),b(he,e),b(Te,e),b(we,e),Fe[N].d(e),b(Ze,e),Ee[z].d(e),b(Ie,e),b(Re,e),He[x].d(e),b(Be,e),b(Xe,e),b(ve,e),b(Ve,e),b(Y,e),b(Ye,e)}}}const fs='{"title":"Ricerca semantica con FAISS","local":"ricerca-semantica-con-faiss","sections":[{"title":"Usare gli embedding per la ricerca semantica","local":"usare-gli-embedding-per-la-ricerca-semantica","sections":[],"depth":2},{"title":"Caricare e preparare il dataset","local":"caricare-e-preparare-il-dataset","sections":[],"depth":2},{"title":"Creare i text embedding","local":"creare-i-text-embedding","sections":[],"depth":2},{"title":"Usare FAISS per ricerca di similarità efficiente","local":"usare-faiss-per-ricerca-di-similarità-efficiente","sections":[],"depth":2}],"depth":1}';function Ts(C,a,p){let n="pt";return ls(()=>{const w=new URLSearchParams(window.location.search);p(0,n=w.get("fw")||"pt")}),[n]}class ks extends ts{constructor(a){super(),ss(this,a,Ts,hs,es,{})}}export{ks as component}; | |
Xet Storage Details
- Size:
- 67.2 kB
- Xet hash:
- 53ba1a76f5b6faedae267ebf5b16ce48189f5f0f08bf0b7669085643d66978ff
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.