Buckets:
| import{s as me,o as pe,n as oe}from"../chunks/scheduler.9991993c.js";import{S as ue,i as he,g as T,s as c,r as h,A as be,h as M,f as s,c as i,j as K,u as b,x as B,k as S,y,a as d,v as f,d as g,t as _,w as k}from"../chunks/index.7fc9a5e7.js";import{D as de}from"../chunks/Docstring.0d7e3ebb.js";import{C as se}from"../chunks/CodeBlock.e11cba92.js";import{E as te}from"../chunks/ExampleCodeBlock.46b9776a.js";import{H as ae,E as fe}from"../chunks/EditOnGithub.84ab7f0e.js";function ge(J){let t,r;return t=new se({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9tZXRyaWMlMEElMEFyb3VnZV9tZXRyaWMlMjAlM0QlMjBsb2FkX21ldHJpYyglMjJyb3VnZSUyMiklMEElMEElMEFkZWYlMjByb3VnZV9mbihwcmVkaWN0aW9ucyUyQyUyMGxhYmVscyklM0ElMEElMjAlMjAlMjAlMjBkZWNvZGVkX3ByZWRpY3Rpb25zJTIwJTNEJTIwdG9rZW5pemVyLmJhdGNoX2RlY29kZShwcmVkaWN0aW9ucyUyQyUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlKSUwQSUyMCUyMCUyMCUyMGRlY29kZWRfbGFiZWxzJTIwJTNEJTIwdG9rZW5pemVyLmJhdGNoX2RlY29kZShsYWJlbHMlMkMlMjBza2lwX3NwZWNpYWxfdG9rZW5zJTNEVHJ1ZSklMEElMjAlMjAlMjAlMjByZXN1bHQlMjAlM0QlMjByb3VnZV9tZXRyaWMuY29tcHV0ZShwcmVkaWN0aW9ucyUzRGRlY29kZWRfcHJlZGljdGlvbnMlMkMlMjByZWZlcmVuY2VzJTNEZGVjb2RlZF9sYWJlbHMpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTdCa2V5JTNBJTIwdmFsdWUubWlkLmZtZWFzdXJlJTIwKiUyMDEwMCUyMGZvciUyMGtleSUyQyUyMHZhbHVlJTIwaW4lMjByZXN1bHQuaXRlbXMoKSU3RA==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_metric | |
| rouge_metric = load_metric(<span class="hljs-string">"rouge"</span>) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">rouge_fn</span>(<span class="hljs-params">predictions, labels</span>): | |
| decoded_predictions = tokenizer.batch_decode(predictions, skip_special_tokens=<span class="hljs-literal">True</span>) | |
| decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=<span class="hljs-literal">True</span>) | |
| result = rouge_metric.compute(predictions=decoded_predictions, references=decoded_labels) | |
| <span class="hljs-keyword">return</span> {key: value.mid.fmeasure * <span class="hljs-number">100</span> <span class="hljs-keyword">for</span> key, value <span class="hljs-keyword">in</span> result.items()}`,wrap:!1}}),{c(){h(t.$$.fragment)},l(a){b(t.$$.fragment,a)},m(a,l){f(t,a,l),r=!0},p:oe,i(a){r||(g(t.$$.fragment,a),r=!0)},o(a){_(t.$$.fragment,a),r=!1},d(a){k(t,a)}}}function _e(J){let t,r="The above function will return a dict containing values which will be logged like any other Keras metric:",a,l,u;return l=new se({props:{code:"JTdCJ3JvdWdlMSclM0ElMjAzNy40MTk5JTJDJTIwJ3JvdWdlMiclM0ElMjAxMy45NzY4JTJDJTIwJ3JvdWdlTCclM0ElMjAzNC4zNjElMkMlMjAncm91Z2VMc3VtJyUzQSUyMDM1LjA3ODE=",highlighted:'{'rouge1': <span class="hljs-number">37.4199</span>, 'rouge2': <span class="hljs-number">13.9768</span>, 'rougeL': <span class="hljs-number">34.361</span>, 'rougeLsum': <span class="hljs-number">35.0781</span>',wrap:!1}}),{c(){t=T("p"),t.textContent=r,a=c(),h(l.$$.fragment)},l(n){t=M(n,"P",{"data-svelte-h":!0}),B(t)!=="svelte-zfl4ie"&&(t.textContent=r),a=i(n),b(l.$$.fragment,n)},m(n,w){d(n,t,w),d(n,a,w),f(l,n,w),u=!0},p:oe,i(n){u||(g(l.$$.fragment,n),u=!0)},o(n){_(l.$$.fragment,n),u=!1},d(n){n&&(s(t),s(a)),k(l,n)}}}function ke(J){let t,r;return t=new se({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycy5rZXJhc19jYWxsYmFja3MlMjBpbXBvcnQlMjBQdXNoVG9IdWJDYWxsYmFjayUwQSUwQXB1c2hfdG9faHViX2NhbGxiYWNrJTIwJTNEJTIwUHVzaFRvSHViQ2FsbGJhY2soJTBBJTIwJTIwJTIwJTIwb3V0cHV0X2RpciUzRCUyMi4lMkZtb2RlbF9zYXZlJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwaHViX21vZGVsX2lkJTNEJTIyZ3B0NS03eGxhcmdlJTIyJTJDJTBBKSUwQSUwQW1vZGVsLmZpdCh0cmFpbl9kYXRhc2V0JTJDJTIwY2FsbGJhY2tzJTNEJTVCcHVzaF90b19odWJfY2FsbGJhY2slNUQp",highlighted:`<span class="hljs-keyword">from</span> transformers.keras_callbacks <span class="hljs-keyword">import</span> PushToHubCallback | |
| push_to_hub_callback = PushToHubCallback( | |
| output_dir=<span class="hljs-string">"./model_save"</span>, | |
| tokenizer=tokenizer, | |
| hub_model_id=<span class="hljs-string">"gpt5-7xlarge"</span>, | |
| ) | |
| model.fit(train_dataset, callbacks=[push_to_hub_callback])`,wrap:!1}}),{c(){h(t.$$.fragment)},l(a){b(t.$$.fragment,a)},m(a,l){f(t,a,l),r=!0},p:oe,i(a){r||(g(t.$$.fragment,a),r=!0)},o(a){_(t.$$.fragment,a),r=!1},d(a){k(t,a)}}}function we(J){let t,r,a,l,u,n,w,ne="在Keras中训练Transformers模型时,有一些库特定的callbacks函数可用于自动执行常见任务:",G,W,P,m,Z,D,V,re=`Callback to compute metrics at the end of every epoch. Unlike normal Keras metrics, these do not need to be | |
| compilable by TF. It is particularly useful for common NLP metrics like BLEU and ROUGE that require string | |
| operations or generation loops that cannot be compiled. Predictions (or generations) will be computed on the | |
| <code>eval_dataset</code> before being passed to the <code>metric_fn</code> in <code>np.ndarray</code> format. The <code>metric_fn</code> should compute | |
| metrics and return a dict mapping metric names to metric values.`,A,E,le=`We provide an example of a suitable metric_fn that computes ROUGE scores for a summarization model below. Note that | |
| this example skips some post-processing for readability and simplicity, and should probably not be used as-is!`,L,x,Q,C,X,I,F,$,z,O,N,ce=`Callback that will save and push the model to the Hub regularly. By default, it pushes once per epoch, but this can | |
| be changed with the <code>save_strategy</code> argument. Pushed models can be accessed like any other model on the hub, such | |
| as with the <code>from_pretrained</code> method.`,ee,j,Y,H,R,U,q;return u=new ae({props:{title:"Keras callbacks",local:"keras-callbacks",headingTag:"h1"}}),W=new ae({props:{title:"KerasMetricCallback",local:"transformers.KerasMetricCallback",headingTag:"h2"}}),Z=new de({props:{name:"class transformers.KerasMetricCallback",anchor:"transformers.KerasMetricCallback",parameters:[{name:"metric_fn",val:": Callable"},{name:"eval_dataset",val:": Union"},{name:"output_cols",val:": Optional = None"},{name:"label_cols",val:": Optional = None"},{name:"batch_size",val:": Optional = None"},{name:"predict_with_generate",val:": bool = False"},{name:"use_xla_generation",val:": bool = False"},{name:"generate_kwargs",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.KerasMetricCallback.metric_fn",description:`<strong>metric_fn</strong> (<code>Callable</code>) — | |
| Metric function provided by the user. It will be called with two arguments - <code>predictions</code> and <code>labels</code>. | |
| These contain the model’s outputs and matching labels from the dataset. It should return a dict mapping | |
| metric names to numerical values.`,name:"metric_fn"},{anchor:"transformers.KerasMetricCallback.eval_dataset",description:`<strong>eval_dataset</strong> (<code>tf.data.Dataset</code> or <code>dict</code> or <code>tuple</code> or <code>np.ndarray</code> or <code>tf.Tensor</code>) — | |
| Validation data to be used to generate predictions for the <code>metric_fn</code>.`,name:"eval_dataset"},{anchor:"transformers.KerasMetricCallback.output_cols",description:"<strong>output_cols</strong> (`List[str], <em>optional</em>) —\nA list of columns to be retained from the model output as the predictions. Defaults to all.",name:"output_cols"},{anchor:"transformers.KerasMetricCallback.label_cols",description:`<strong>label_cols</strong> (’<code>List[str]</code>, <em>optional</em>’) — | |
| A list of columns to be retained from the input dataset as the labels. Will be autodetected if this is not | |
| supplied.`,name:"label_cols"},{anchor:"transformers.KerasMetricCallback.batch_size",description:`<strong>batch_size</strong> (<code>int</code>, <em>optional</em>) — | |
| Batch size. Only used when the data is not a pre-batched <code>tf.data.Dataset</code>.`,name:"batch_size"},{anchor:"transformers.KerasMetricCallback.predict_with_generate",description:`<strong>predict_with_generate</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether we should use <code>model.generate()</code> to get outputs for the model.`,name:"predict_with_generate"},{anchor:"transformers.KerasMetricCallback.use_xla_generation",description:`<strong>use_xla_generation</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| If we’re generating, whether to compile model generation with XLA. This can massively increase the speed of | |
| generation (up to 100X speedup) but will require a new XLA compilation for each input shape. When using XLA | |
| generation, it’s a good idea to pad your inputs to the same size, or to use the <code>pad_to_multiple_of</code> | |
| argument in your <code>tokenizer</code> or <code>DataCollator</code>, which will reduce the number of unique input shapes and | |
| save a lot of compilation time. This option has no effect is <code>predict_with_generate</code> is <code>False</code>.`,name:"use_xla_generation"},{anchor:"transformers.KerasMetricCallback.generate_kwargs",description:`<strong>generate_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| Keyword arguments to pass to <code>model.generate()</code> when generating. Has no effect if <code>predict_with_generate</code> | |
| is <code>False</code>.`,name:"generate_kwargs"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/keras_callbacks.py#L20"}}),x=new te({props:{anchor:"transformers.KerasMetricCallback.example",$$slots:{default:[ge]},$$scope:{ctx:J}}}),C=new te({props:{anchor:"transformers.KerasMetricCallback.example-2",$$slots:{default:[_e]},$$scope:{ctx:J}}}),I=new ae({props:{title:"PushToHubCallback",local:"transformers.PushToHubCallback",headingTag:"h2"}}),z=new de({props:{name:"class transformers.PushToHubCallback",anchor:"transformers.PushToHubCallback",parameters:[{name:"output_dir",val:": Union"},{name:"save_strategy",val:": Union = 'epoch'"},{name:"save_steps",val:": Optional = None"},{name:"tokenizer",val:": Optional = None"},{name:"hub_model_id",val:": Optional = None"},{name:"hub_token",val:": Optional = None"},{name:"checkpoint",val:": bool = False"},{name:"**model_card_args",val:""}],parametersDescription:[{anchor:"transformers.PushToHubCallback.output_dir",description:`<strong>output_dir</strong> (<code>str</code>) — | |
| The output directory where the model predictions and checkpoints will be written and synced with the | |
| repository on the Hub.`,name:"output_dir"},{anchor:"transformers.PushToHubCallback.save_strategy",description:`<strong>save_strategy</strong> (<code>str</code> or <a href="/docs/transformers/main/zh/internal/trainer_utils#transformers.IntervalStrategy">IntervalStrategy</a>, <em>optional</em>, defaults to <code>"epoch"</code>) — | |
| The checkpoint save strategy to adopt during training. Possible values are:</p> | |
| <ul> | |
| <li><code>"no"</code>: Save is done at the end of training.</li> | |
| <li><code>"epoch"</code>: Save is done at the end of each epoch.</li> | |
| <li><code>"steps"</code>: Save is done every <code>save_steps</code></li> | |
| </ul>`,name:"save_strategy"},{anchor:"transformers.PushToHubCallback.save_steps",description:`<strong>save_steps</strong> (<code>int</code>, <em>optional</em>) — | |
| The number of steps between saves when using the “steps” <code>save_strategy</code>.`,name:"save_steps"},{anchor:"transformers.PushToHubCallback.tokenizer",description:`<strong>tokenizer</strong> (<code>PreTrainedTokenizerBase</code>, <em>optional</em>) — | |
| The tokenizer used by the model. If supplied, will be uploaded to the repo alongside the weights.`,name:"tokenizer"},{anchor:"transformers.PushToHubCallback.hub_model_id",description:`<strong>hub_model_id</strong> (<code>str</code>, <em>optional</em>) — | |
| The name of the repository to keep in sync with the local <code>output_dir</code>. It can be a simple model ID in | |
| which case the model will be pushed in your namespace. Otherwise it should be the whole repository name, | |
| for instance <code>"user_name/model"</code>, which allows you to push to an organization you are a member of with | |
| <code>"organization_name/model"</code>.</p> | |
| <p>Will default to the name of <code>output_dir</code>.`,name:"hub_model_id"},{anchor:"transformers.PushToHubCallback.hub_token",description:`<strong>hub_token</strong> (<code>str</code>, <em>optional</em>) — | |
| The token to use to push the model to the Hub. Will default to the token in the cache folder obtained with | |
| <code>huggingface-cli login</code>.`,name:"hub_token"},{anchor:"transformers.PushToHubCallback.checkpoint",description:`<strong>checkpoint</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to save full training checkpoints (including epoch and optimizer state) to allow training to be | |
| resumed. Only usable when <code>save_strategy</code> is <code>"epoch"</code>.`,name:"checkpoint"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/keras_callbacks.py#L268"}}),j=new te({props:{anchor:"transformers.PushToHubCallback.example",$$slots:{default:[ke]},$$scope:{ctx:J}}}),H=new fe({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/zh/main_classes/keras_callbacks.md"}}),{c(){t=T("meta"),r=c(),a=T("p"),l=c(),h(u.$$.fragment),n=c(),w=T("p"),w.textContent=ne,G=c(),h(W.$$.fragment),P=c(),m=T("div"),h(Z.$$.fragment),D=c(),V=T("p"),V.innerHTML=re,A=c(),E=T("p"),E.textContent=le,L=c(),h(x.$$.fragment),Q=c(),h(C.$$.fragment),X=c(),h(I.$$.fragment),F=c(),$=T("div"),h(z.$$.fragment),O=c(),N=T("p"),N.innerHTML=ce,ee=c(),h(j.$$.fragment),Y=c(),h(H.$$.fragment),R=c(),U=T("p"),this.h()},l(e){const o=be("svelte-u9bgzb",document.head);t=M(o,"META",{name:!0,content:!0}),o.forEach(s),r=i(e),a=M(e,"P",{}),K(a).forEach(s),l=i(e),b(u.$$.fragment,e),n=i(e),w=M(e,"P",{"data-svelte-h":!0}),B(w)!=="svelte-1embeqv"&&(w.textContent=ne),G=i(e),b(W.$$.fragment,e),P=i(e),m=M(e,"DIV",{class:!0});var p=K(m);b(Z.$$.fragment,p),D=i(p),V=M(p,"P",{"data-svelte-h":!0}),B(V)!=="svelte-d9jxhh"&&(V.innerHTML=re),A=i(p),E=M(p,"P",{"data-svelte-h":!0}),B(E)!=="svelte-hu0773"&&(E.textContent=le),L=i(p),b(x.$$.fragment,p),Q=i(p),b(C.$$.fragment,p),p.forEach(s),X=i(e),b(I.$$.fragment,e),F=i(e),$=M(e,"DIV",{class:!0});var v=K($);b(z.$$.fragment,v),O=i(v),N=M(v,"P",{"data-svelte-h":!0}),B(N)!=="svelte-1o3veql"&&(N.innerHTML=ce),ee=i(v),b(j.$$.fragment,v),v.forEach(s),Y=i(e),b(H.$$.fragment,e),R=i(e),U=M(e,"P",{}),K(U).forEach(s),this.h()},h(){S(t,"name","hf:doc:metadata"),S(t,"content",$e),S(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),S($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){y(document.head,t),d(e,r,o),d(e,a,o),d(e,l,o),f(u,e,o),d(e,n,o),d(e,w,o),d(e,G,o),f(W,e,o),d(e,P,o),d(e,m,o),f(Z,m,null),y(m,D),y(m,V),y(m,A),y(m,E),y(m,L),f(x,m,null),y(m,Q),f(C,m,null),d(e,X,o),f(I,e,o),d(e,F,o),d(e,$,o),f(z,$,null),y($,O),y($,N),y($,ee),f(j,$,null),d(e,Y,o),f(H,e,o),d(e,R,o),d(e,U,o),q=!0},p(e,[o]){const p={};o&2&&(p.$$scope={dirty:o,ctx:e}),x.$set(p);const v={};o&2&&(v.$$scope={dirty:o,ctx:e}),C.$set(v);const ie={};o&2&&(ie.$$scope={dirty:o,ctx:e}),j.$set(ie)},i(e){q||(g(u.$$.fragment,e),g(W.$$.fragment,e),g(Z.$$.fragment,e),g(x.$$.fragment,e),g(C.$$.fragment,e),g(I.$$.fragment,e),g(z.$$.fragment,e),g(j.$$.fragment,e),g(H.$$.fragment,e),q=!0)},o(e){_(u.$$.fragment,e),_(W.$$.fragment,e),_(Z.$$.fragment,e),_(x.$$.fragment,e),_(C.$$.fragment,e),_(I.$$.fragment,e),_(z.$$.fragment,e),_(j.$$.fragment,e),_(H.$$.fragment,e),q=!1},d(e){e&&(s(r),s(a),s(l),s(n),s(w),s(G),s(P),s(m),s(X),s(F),s($),s(Y),s(R),s(U)),s(t),k(u,e),k(W,e),k(Z),k(x),k(C),k(I,e),k(z),k(j),k(H,e)}}}const $e='{"title":"Keras callbacks","local":"keras-callbacks","sections":[{"title":"KerasMetricCallback","local":"transformers.KerasMetricCallback","sections":[],"depth":2},{"title":"PushToHubCallback","local":"transformers.PushToHubCallback","sections":[],"depth":2}],"depth":1}';function ye(J){return pe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class je extends ue{constructor(t){super(),he(this,t,ye,we,me,{})}}export{je as component}; | |
Xet Storage Details
- Size:
- 16.3 kB
- Xet hash:
- d828744b78551b1daba1f9f7b69a50d048ac54df7db905a650a4434e29cd4020
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.