Buckets:

hf-doc-build
/

doc

hf-doc-build/doc / optimum-neuron /v0.2.0.dev2 /en /_app /immutable /nodes /28.cd8a21b2.js

rtrm's picture

about 2 months ago

28.1 kB

	import{s as Le,n as Ve,o as Ye}from"../chunks/scheduler.a2b4ca8e.js";import{S as Pe,i as qe,g as i,s as a,r as p,A as De,h as r,f as l,c as s,j as Ne,u as m,x as o,k as Ee,y as Ke,a as n,v as M,d as c,t as u,w}from"../chunks/index.d2f673cc.js";import{C as T}from"../chunks/CodeBlock.6666c128.js";import{H as ct}from"../chunks/index.35ef470e.js";function Oe(Me){let d,ut,mt,wt,h,Tt,J,ce='<em>This tutorial is available in two different formats, as <a href="https://huggingface.co/docs/optimum-neuron/training_tutorials/fine_tune_bert" rel="nofollow">web page</a> and <a href="https://github.com/huggingface/optimum-neuron/blob/main/notebooks/text-classification/fine_tune_bert.ipynb" rel="nofollow">notebook version</a></em>.',dt,y,ue='This guide will help you to get started with <a href="https://aws.amazon.com/machine-learning/trainium/?nc1=h_ls" rel="nofollow">AWS Trainium</a> and Hugging Face Transformers. It will cover how to set up a Trainium instance on AWS, load & fine-tune a transformers model for text-classification.',ht,f,we="You will learn how to:",Jt,U,Te="<li>Setup AWS environment</li> <li>Load and process the dataset</li> <li>Fine-tune BERT using Hugging Face Transformers and Optimum Neuron</li>",yt,j,de='Before we can start, make sure you have a <a href="https://huggingface.co/join" rel="nofollow">Hugging Face Account</a> to save artifacts and experiments.',ft,b,Ut,g,he='<a href="https://aws.amazon.com/de/ec2/instance-types/trn1/" rel="nofollow">AWS Trainium (Trn1)</a> is a purpose-built EC2 for deep learning (DL) training workloads. Trainium is the successor of <a href="https://aws.amazon.com/ec2/instance-types/inf1/?nc1=h_ls" rel="nofollow">AWS Inferentia</a> focused on high-performance training workloads claiming up to 50% cost-to-train savings over comparable GPU-based instances.',jt,I,Je="Trainium has been optimized for training natural language processing, computer vision, and recommender models used. The accelerator supports a wide range of data types, including FP32, TF32, BF16, FP16, UINT8, and configurable FP8.",bt,C,ye='The biggest Trainium instance, the <code>trn1.32xlarge</code> comes with over 500GB of memory, making it easy to fine-tune ~10B parameter models on a single instance. Below you will find an overview of the available instance types. More details <a href="https://aws.amazon.com/en/ec2/instance-types/trn1/#Product_details" rel="nofollow">here</a>:',gt,v,fe="<thead><tr><th>instance size</th> <th>accelerators</th> <th>accelerator memory</th> <th>vCPU</th> <th>CPU Memory</th> <th>price per hour</th></tr></thead> <tbody><tr><td>trn1.2xlarge</td> <td>1</td> <td>32</td> <td>8</td> <td>32</td> <td>$1.34</td></tr> <tr><td>trn1.32xlarge</td> <td>16</td> <td>512</td> <td>128</td> <td>512</td> <td>$21.50</td></tr> <tr><td>trn1n.32xlarge (2x bandwidth)</td> <td>16</td> <td>512</td> <td>128</td> <td>512</td> <td>$24.78</td></tr></tbody>",It,Ct,vt,B,Ue="Now we know what Trainium offers, let’s get started. 🚀",Bt,W,je="<em>Note: This tutorial was created on a trn1.2xlarge AWS EC2 Instance.</em>",Wt,Z,Zt,_,be='In this tutorial, we will use the <code>trn1.2xlarge</code> instance on AWS with 1 Accelerator, including two Neuron Cores and the <a href="https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2" rel="nofollow">Hugging Face Neuron Deep Learning AMI</a>.',_t,A,ge="Once the instance is up and running, we can ssh into it. But instead of developing inside a terminal we want to use a <code>Jupyter</code> environment, which we can use for preparing our dataset and launching the training. For this, we need to add a port for forwarding in the <code>ssh</code> command, which will tunnel our localhost traffic to the Trainium instance.",At,$,$t,k,Ie="We need to make sure we have the <code>training</code> extra installed, to get all the necessary dependencies:",kt,x,xt,R,Ce="We can now start our <strong><code>jupyter</code></strong> server.",Rt,H,Ht,Q,ve="You should see a familiar <strong><code>jupyter</code></strong> output with a URL to the notebook.",Qt,X,Be="<strong><code>http://localhost:8080/?token=8c1739aff1755bd7958c4cfccc8d08cb5da5234f61f129a9</code></strong>",Xt,G,We="We can click on it, and a <strong><code>jupyter</code></strong> environment opens in our local browser.",Gt,F,Ze='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/optimum/neuron/tutorial-fine-tune-bert-jupyter.png" alt="jupyter.webp"/>',Ft,z,_e="We are going to use the Jupyter environment only for preparing the dataset and then <code>torchrun</code> for launching our training script on both neuron cores for distributed training. Lets create a new notebook and get started.",zt,S,St,N,Ae='We are training a Text Classification model on the <a href="https://huggingface.co/datasets/dair-ai/emotion" rel="nofollow">emotion</a> dataset to keep the example straightforward. The <code>emotion</code> is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.',Nt,E,$e='We will use the <code>load_dataset()</code> method from the <a href="https://huggingface.co/docs/datasets/index" rel="nofollow">🤗 Datasets</a> library to load the <code>emotion</code>.',Et,L,Lt,V,ke="Let’s check out an example of the dataset.",Vt,Y,Yt,P,xe='We must convert our “Natural Language” to token IDs to train our model. This is done by a Tokenizer, which tokenizes the inputs (including converting the tokens to their corresponding IDs in the pre-trained vocabulary). if you want to learn more about this, out <a href="https://huggingface.co/course/chapter6/1?fw=pt" rel="nofollow">chapter 6</a> of the <a href="https://huggingface.co/course/chapter1/1" rel="nofollow">Hugging Face Course</a>.',Pt,q,Re="In order to avoid graph recompilation, inputs should have a fixed shape. We need to truncate or pad all samples to the same length.",qt,D,Dt,K,Kt,O,He='We can use the <strong><a href="https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.Trainer" rel="nofollow">Trainer</a></strong> and <strong><a href="https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a></strong> to fine-tune PyTorch-based transformer models.',Ot,tt,Qe='We prepared a simple <a href="https://github.com/huggingface/optimum-neuron/blob/main/notebooks/text-classification/scripts/train.py" rel="nofollow">train.py</a> training script to perform training and evaluation on the dataset. Below is an excerpt:',te,et,ee,lt,Xe='We can load the training script into our environment using the <code>wget</code> command or manually copy it into the notebook from <a href="https://github.com/huggingface/optimum-neuron/blob/notebooks/text-classification/scripts/train.py" rel="nofollow">here</a>.',le,nt,ne,at,Ge="We will use <code>torchrun</code> to launch our training script on both neuron cores for distributed training, thus allowing data parallelism. <code>torchrun</code> is a tool that automatically distributes a PyTorch model across multiple accelerators. We can pass the number of accelerators as <code>nproc_per_node</code> arguments alongside our hyperparameters.",ae,st,Fe="We’ll use the following command to launch training:",se,it,ie,rt,ze="After compilation, it will only take few minutes to complete the training.",re,ot,oe,pt,Se="Last but not least, terminate the EC2 instance to avoid unnecessary charges. Looking at the price-performance, our training only costs <strong><code>20ct</code></strong> (<strong><code>1.34$/h * 0.13h = 0.18$</code></strong>)",pe,Mt,me;return h=new ct({props:{title:"Getting started with AWS Trainium and Hugging Face Transformers",local:"getting-started-with-aws-trainium-and-hugging-face-transformers",headingTag:"h1"}}),b=new ct({props:{title:"Quick intro: AWS Trainium",local:"quick-intro-aws-trainium",headingTag:"h2"}}),Z=new ct({props:{title:"1. Setup AWS environment",local:"1-setup-aws-environment",headingTag:"h2"}}),$=new T({props:{code:"UFVCTElDX0ROUyUzRCUyMiUyMiUyMCUyMyUyMElQJTIwYWRkcmVzcyUyQyUyMGUuZy4lMjBlYzItMy04MC0uLi4uJTBBS0VZX1BBVEglM0QlMjIlMjIlMjAlMjMlMjBsb2NhbCUyMHBhdGglMjB0byUyMGtleSUyQyUyMGUuZy4lMjBzc2glMkZ0cm4ucGVtJTBBJTBBc3NoJTIwLUwlMjA4MDgwJTNBbG9jYWxob3N0JTNBODA4MCUyMC1pJTIwJTI0JTdCS0VZX05BTUUlN0QucGVtJTIwdWJ1bnR1JTQwJTI0UFVCTElDX0ROUw==",highlighted:`PUBLIC_DNS=<span class="hljs-string">""</span> <span class="hljs-comment"># IP address, e.g. ec2-3-80-....</span>
	KEY_PATH=<span class="hljs-string">""</span> <span class="hljs-comment"># local path to key, e.g. ssh/trn.pem</span>

	ssh -L 8080:localhost:8080 -i <span class="hljs-variable">\${KEY_NAME}</span>.pem ubuntu@<span class="hljs-variable">$PUBLIC_DNS</span>`,wrap:!1}}),x=new T({props:{code:"cHl0aG9uJTIwLW0lMjBwaXAlMjBpbnN0YWxsJTIwLiU1QnRyYWluaW5nJTVE",highlighted:"python -m pip install .[training]",wrap:!1}}),H=new T({props:{code:"cHl0aG9uJTIwLW0lMjBub3RlYm9vayUyMC0tYWxsb3ctcm9vdCUyMC0tcG9ydCUzRDgwODA=",highlighted:"python -m notebook --allow-root --port=8080",wrap:!1}}),S=new ct({props:{title:"2. Load and process the dataset",local:"2-load-and-process-the-dataset",headingTag:"h2"}}),L=new T({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBJTBBJTIzJTIwRGF0YXNldCUyMGlkJTIwZnJvbSUyMGh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldCUwQWRhdGFzZXRfaWQlMjAlM0QlMjAlMjJkYWlyLWFpJTJGZW1vdGlvbiUyMiUwQSUwQSUyMyUyMExvYWQlMjByYXclMjBkYXRhc2V0JTBBcmF3X2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoZGF0YXNldF9pZCklMEElMEFwcmludChmJTIyVHJhaW4lMjBkYXRhc2V0JTIwc2l6ZSUzQSUyMCU3QmxlbihyYXdfZGF0YXNldCU1Qid0cmFpbiclNUQpJTdEJTIyKSUwQXByaW50KGYlMjJUZXN0JTIwZGF0YXNldCUyMHNpemUlM0ElMjAlN0JsZW4ocmF3X2RhdGFzZXQlNUIndGVzdCclNUQpJTdEJTIyKSUwQSUwQSUyMyUyMFRyYWluJTIwZGF0YXNldCUyMHNpemUlM0ElMjAxNjAwMCUwQSUyMyUyMFRlc3QlMjBkYXRhc2V0JTIwc2l6ZSUzQSUyMDIwMDA=",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset


	<span class="hljs-comment"># Dataset id from huggingface.co/dataset</span>
	dataset_id = <span class="hljs-string">"dair-ai/emotion"</span>

	<span class="hljs-comment"># Load raw dataset</span>
	raw_dataset = load_dataset(dataset_id)

	<span class="hljs-built_in">print</span>(<span class="hljs-string">f"Train dataset size: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(raw_dataset[<span class="hljs-string">'train'</span>])}</span>"</span>)
	<span class="hljs-built_in">print</span>(<span class="hljs-string">f"Test dataset size: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(raw_dataset[<span class="hljs-string">'test'</span>])}</span>"</span>)

	<span class="hljs-comment"># Train dataset size: 16000</span>
	<span class="hljs-comment"># Test dataset size: 2000</span>`,wrap:!1}}),Y=new T({props:{code:"ZnJvbSUyMHJhbmRvbSUyMGltcG9ydCUyMHJhbmRyYW5nZSUwQSUwQSUwQXJhbmRvbV9pZCUyMCUzRCUyMHJhbmRyYW5nZShsZW4ocmF3X2RhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCkpJTBBcmF3X2RhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCU1QnJhbmRvbV9pZCU1RCUwQSUyMyUyMCU3Qid0ZXh0JyUzQSUyMCdpJTIwYWxzbyUyMGxpa2UlMjB0byUyMGxpc3RlbiUyMHRvJTIwamF6eiUyMHdoaWxzdCUyMHBhaW50aW5nJTIwaXQlMjBtYWtlcyUyMG1lJTIwZmVlbCUyMG1vcmUlMjBhcnRpc3RpYyUyMGFuZCUyMGFtYml0aW91cyUyMGFjdHVhbGx5JTIwbG9vayUyMHRvJTIwdGhlJTIwcmFpbmJvdyclMkMlMjAnbGFiZWwnJTNBJTIwMSU3RA==",highlighted:`<span class="hljs-keyword">from</span> random <span class="hljs-keyword">import</span> randrange


	random_id = randrange(<span class="hljs-built_in">len</span>(raw_dataset[<span class="hljs-string">"train"</span>]))
	raw_dataset[<span class="hljs-string">"train"</span>][random_id]
	<span class="hljs-comment"># {'text': 'i also like to listen to jazz whilst painting it makes me feel more artistic and ambitious actually look to the rainbow', 'label': 1}</span>`,wrap:!1}}),D=new T({props:{code:"aW1wb3J0JTIwb3MlMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUwQSUwQSUwQSUyMyUyME1vZGVsJTIwaWQlMjB0byUyMGxvYWQlMjB0aGUlMjB0b2tlbml6ZXIlMEFtb2RlbF9pZCUyMCUzRCUyMCUyMmJlcnQtYmFzZS11bmNhc2VkJTIyJTBBJTBBJTIzJTIwTG9hZCUyMFRva2VuaXplciUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkKSUwQSUwQSUwQSUyMyUyMFRva2VuaXplJTIwaGVscGVyJTIwZnVuY3Rpb24lMEFkZWYlMjB0b2tlbml6ZShiYXRjaCklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoYmF0Y2glNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwcGFkZGluZyUzRCUyMm1heF9sZW5ndGglMjIlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTBBZGVmJTIwdG9rZW5pemVfZnVuY3Rpb24oZXhhbXBsZSklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZXhhbXBsZSU1QiUyMnRleHQlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUwQSUyMyUyMFRva2VuaXplJTIwZGF0YXNldCUwQXRva2VuaXplZF9lbW90aW9ucyUyMCUzRCUyMHJhd19kYXRhc2V0Lm1hcCh0b2tlbml6ZSUyQyUyMGJhdGNoZWQlM0RUcnVlJTJDJTIwcmVtb3ZlX2NvbHVtbnMlM0QlNUIlMjJ0ZXh0JTIyJTVEKQ==",highlighted:`<span class="hljs-keyword">import</span> os

	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer


	<span class="hljs-comment"># Model id to load the tokenizer</span>
	model_id = <span class="hljs-string">"bert-base-uncased"</span>

	<span class="hljs-comment"># Load Tokenizer</span>
	tokenizer = AutoTokenizer.from_pretrained(model_id)


	<span class="hljs-comment"># Tokenize helper function</span>
	<span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize</span>(<span class="hljs-params">batch</span>):
	<span class="hljs-keyword">return</span> tokenizer(batch[<span class="hljs-string">"text"</span>], padding=<span class="hljs-string">"max_length"</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>)


	<span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_function</span>(<span class="hljs-params">example</span>):
	<span class="hljs-keyword">return</span> tokenizer(
	example[<span class="hljs-string">"text"</span>],
	padding=<span class="hljs-string">"max_length"</span>,
	truncation=<span class="hljs-literal">True</span>,
	)


	<span class="hljs-comment"># Tokenize dataset</span>
	tokenized_emotions = raw_dataset.<span class="hljs-built_in">map</span>(tokenize, batched=<span class="hljs-literal">True</span>, remove_columns=[<span class="hljs-string">"text"</span>])`,wrap:!1}}),K=new ct({props:{title:"3. Fine-tune BERT using Hugging Face Transformers",local:"3-fine-tune-bert-using-hugging-face-transformers",headingTag:"h2"}}),et=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRyYWluZXIlMkMlMjBUcmFpbmluZ0FyZ3VtZW50cyUwQSUwQWRlZiUyMHBhcnNlX2FyZ3MoKSUzQSUwQSUwOS4uLiUwQSUwQWRlZiUyMHRyYWluaW5nX2Z1bmN0aW9uKGFyZ3MpJTNBJTBBJTBBJTIwJTIwJTIwJTIwLi4uJTBBJTBBJTIwJTIwJTIwJTIwJTIzJTIwRG93bmxvYWQlMjB0aGUlMjBtb2RlbCUyMGZyb20lMjBodWdnaW5nZmFjZS5jbyUyRm1vZGVscyUwQSUyMCUyMCUyMCUyMG1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXJncy5tb2RlbF9pZCUyQyUyMG51bV9sYWJlbHMlM0RudW1fbGFiZWxzJTJDJTIwbGFiZWwyaWQlM0RsYWJlbDJpZCUyQyUyMGlkMmxhYmVsJTNEaWQybGFiZWwlMEElMjAlMjAlMjAlMjApJTBBJTBBJTIwJTIwJTIwJTIwdHJhaW5pbmdfYXJncyUyMCUzRCUyMFRyYWluaW5nQXJndW1lbnRzKCUwQSUwOSUwOSUwOS4uLiUwQSUyMCUyMCUyMCUyMCklMEElMEElMjAlMjAlMjAlMjAlMjMlMjBDcmVhdGUlMjBUcmFpbmVyJTIwaW5zdGFuY2UlMEElMjAlMjAlMjAlMjB0cmFpbmVyJTIwJTNEJTIwVHJhaW5lciglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXJncyUzRHRyYWluaW5nX2FyZ3MlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0cmFpbl9kYXRhc2V0JTNEdG9rZW5pemVkX2Vtb3Rpb25zJTVCJTIydHJhaW4lMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsX2RhdGFzZXQlM0R0b2tlbml6ZWRfZW1vdGlvbnMlNUIlMjJ2YWxpZGF0aW9uJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvY2Vzc2luZ19jbGFzcyUzRHRva2VuaXplciUyQyUwQSUyMCUyMCUyMCUyMCklMEElMEElMEElMjAlMjAlMjAlMjAlMjMlMjBTdGFydCUyMHRyYWluaW5nJTBBJTIwJTIwJTIwJTIwdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer, TrainingArguments

	<span class="hljs-keyword">def</span> <span class="hljs-title function_">parse_args</span>():
	...

	<span class="hljs-keyword">def</span> <span class="hljs-title function_">training_function</span>(<span class="hljs-params">args</span>):

	...

	<span class="hljs-comment"># Download the model from huggingface.co/models</span>
	model = AutoModelForSequenceClassification.from_pretrained(
	args.model_id, num_labels=num_labels, label2id=label2id, id2label=id2label
	)

	training_args = TrainingArguments(
	...
	)

	<span class="hljs-comment"># Create Trainer instance</span>
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_emotions[<span class="hljs-string">"train"</span>],
	eval_dataset=tokenized_emotions[<span class="hljs-string">"validation"</span>],
	processing_class=tokenizer,
	)


	<span class="hljs-comment"># Start training</span>
	trainer.train()`,wrap:!1}}),nt=new T({props:{code:"IXdnZXQlMjBodHRwcyUzQSUyRiUyRnJhdy5naXRodWJ1c2VyY29udGVudC5jb20lMkZodWdnaW5nZmFjZSUyRm9wdGltdW0tbmV1cm9uJTJGbWFpbiUyRm5vdGVib29rcyUyRnRleHQtY2xhc3NpZmljYXRpb24lMkZzY3JpcHRzJTJGdHJhaW4ucHk=",highlighted:"!wget https://raw.githubusercontent.com/huggingface/optimum-neuron/main/notebooks/text-classification/scripts/train.py",wrap:!1}}),it=new T({props:{code:"IXRvcmNocnVuJTIwLS1ucHJvY19wZXJfbm9kZSUzRDIlMjB0cmFpbi5weSUyMCU1QyUwQSUyMC0tbW9kZWxfaWQlMjBiZXJ0LWJhc2UtdW5jYXNlZCUyMCU1QyUwQSUyMC0tbHIlMjA1ZS01JTIwJTVDJTBBJTIwLS1wZXJfZGV2aWNlX3RyYWluX2JhdGNoX3NpemUlMjA4JTIwJTVDJTBBJTIwLS1iZjE2JTIwVHJ1ZSUyMCU1QyUwQSUyMC0tZXBvY2hzJTIwMw==",highlighted:`!torchrun --nproc_per_node=<span class="hljs-number">2</span> train.py \\
	--model_id bert-base-uncased \\
	--lr <span class="hljs-number">5e-5</span> \\
	--per_device_train_batch_size <span class="hljs-number">8</span> \\
	--bf16 <span class="hljs-literal">True</span> \\
	--epochs <span class="hljs-number">3</span>`,wrap:!1}}),ot=new T({props:{code:"KioqKiolMjB0cmFpbiUyMG1ldHJpY3MlMjAqKioqKiUwQSUyMCUyMGVwb2NoJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTNEJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwMy4wJTBBJTIwJTIwZXZhbF9sb3NzJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTNEJTIwJTIwJTIwJTIwJTIwMC4xNzYxJTBBJTIwJTIwZXZhbF9ydW50aW1lJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTNEJTIwMCUzQTAwJTNBMDMuNzMlMEElMjAlMjBldmFsX3NhbXBsZXNfcGVyX3NlY29uZCUyMCUyMCUzRCUyMCUyMCUyMCUyMDI2Ny45NTYlMEElMjAlMjBldmFsX3N0ZXBzX3Blcl9zZWNvbmQlMjAlMjAlMjAlMjAlM0QlMjAlMjAlMjAlMjAlMjAxNi44ODElMEElMjAlMjB0b3RhbF9mbG9zJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTNEJTIwJTIwMTQ3MDMwMEdGJTBBJTIwJTIwdHJhaW5fbG9zcyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUzRCUyMCUyMCUyMCUyMCUyMDAuMjAyNCUwQSUyMCUyMHRyYWluX3J1bnRpbWUlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlM0QlMjAwJTNBMDclM0EyNy4xNCUwQSUyMCUyMHRyYWluX3NhbXBsZXNfcGVyX3NlY29uZCUyMCUzRCUyMCUyMCUyMCUyMCUyMDUzLjY3NCUwQSUyMCUyMHRyYWluX3N0ZXBzX3Blcl9zZWNvbmQlMjAlMjAlMjAlM0QlMjAlMjAlMjAlMjAlMjAlMjA2LjcwOSUwQQ==",highlighted:`*** train metrics ***
	epoch = <span class="hljs-number">3.0</span>
	eval_loss = <span class="hljs-number">0.1761</span>
	eval_runtime = <span class="hljs-number">0</span>:<span class="hljs-number">00</span>:<span class="hljs-number">03.73</span>
	eval_samples_per_second = <span class="hljs-number">267.956</span>
	eval_steps_per_second = <span class="hljs-number">16.881</span>
	total_flos = 1470300GF
	train_loss = <span class="hljs-number">0.2024</span>
	train_runtime = <span class="hljs-number">0</span>:07:<span class="hljs-number">27.14</span>
	train_samples_per_second = <span class="hljs-number">53.674</span>
	train_steps_per_second = <span class="hljs-number">6.709</span>
	`,wrap:!1}}),{c(){d=i("meta"),ut=a(),mt=i("p"),wt=a(),p(h.$$.fragment),Tt=a(),J=i("p"),J.innerHTML=ce,dt=a(),y=i("p"),y.innerHTML=ue,ht=a(),f=i("p"),f.textContent=we,Jt=a(),U=i("ol"),U.innerHTML=Te,yt=a(),j=i("p"),j.innerHTML=de,ft=a(),p(b.$$.fragment),Ut=a(),g=i("p"),g.innerHTML=he,jt=a(),I=i("p"),I.textContent=Je,bt=a(),C=i("p"),C.innerHTML=ye,gt=a(),v=i("table"),v.innerHTML=fe,It=a(),Ct=i("hr"),vt=a(),B=i("p"),B.textContent=Ue,Bt=a(),W=i("p"),W.innerHTML=je,Wt=a(),p(Z.$$.fragment),Zt=a(),_=i("p"),_.innerHTML=be,_t=a(),A=i("p"),A.innerHTML=ge,At=a(),p($.$$.fragment),$t=a(),k=i("p"),k.innerHTML=Ie,kt=a(),p(x.$$.fragment),xt=a(),R=i("p"),R.innerHTML=Ce,Rt=a(),p(H.$$.fragment),Ht=a(),Q=i("p"),Q.innerHTML=ve,Qt=a(),X=i("p"),X.innerHTML=Be,Xt=a(),G=i("p"),G.innerHTML=We,Gt=a(),F=i("p"),F.innerHTML=Ze,Ft=a(),z=i("p"),z.innerHTML=_e,zt=a(),p(S.$$.fragment),St=a(),N=i("p"),N.innerHTML=Ae,Nt=a(),E=i("p"),E.innerHTML=$e,Et=a(),p(L.$$.fragment),Lt=a(),V=i("p"),V.textContent=ke,Vt=a(),p(Y.$$.fragment),Yt=a(),P=i("p"),P.innerHTML=xe,Pt=a(),q=i("p"),q.textContent=Re,qt=a(),p(D.$$.fragment),Dt=a(),p(K.$$.fragment),Kt=a(),O=i("p"),O.innerHTML=He,Ot=a(),tt=i("p"),tt.innerHTML=Qe,te=a(),p(et.$$.fragment),ee=a(),lt=i("p"),lt.innerHTML=Xe,le=a(),p(nt.$$.fragment),ne=a(),at=i("p"),at.innerHTML=Ge,ae=a(),st=i("p"),st.textContent=Fe,se=a(),p(it.$$.fragment),ie=a(),rt=i("p"),rt.textContent=ze,re=a(),p(ot.$$.fragment),oe=a(),pt=i("p"),pt.innerHTML=Se,pe=a(),Mt=i("p"),this.h()},l(t){const e=De("svelte-u9bgzb",document.head);d=r(e,"META",{name:!0,content:!0}),e.forEach(l),ut=s(t),mt=r(t,"P",{}),Ne(mt).forEach(l),wt=s(t),m(h.$$.fragment,t),Tt=s(t),J=r(t,"P",{"data-svelte-h":!0}),o(J)!=="svelte-1scc2v9"&&(J.innerHTML=ce),dt=s(t),y=r(t,"P",{"data-svelte-h":!0}),o(y)!=="svelte-13ajvys"&&(y.innerHTML=ue),ht=s(t),f=r(t,"P",{"data-svelte-h":!0}),o(f)!=="svelte-1hahfn0"&&(f.textContent=we),Jt=s(t),U=r(t,"OL",{"data-svelte-h":!0}),o(U)!=="svelte-5cl44w"&&(U.innerHTML=Te),yt=s(t),j=r(t,"P",{"data-svelte-h":!0}),o(j)!=="svelte-76ulb8"&&(j.innerHTML=de),ft=s(t),m(b.$$.fragment,t),Ut=s(t),g=r(t,"P",{"data-svelte-h":!0}),o(g)!=="svelte-i1bq8"&&(g.innerHTML=he),jt=s(t),I=r(t,"P",{"data-svelte-h":!0}),o(I)!=="svelte-1xmmjdv"&&(I.textContent=Je),bt=s(t),C=r(t,"P",{"data-svelte-h":!0}),o(C)!=="svelte-3q7kol"&&(C.innerHTML=ye),gt=s(t),v=r(t,"TABLE",{"data-svelte-h":!0}),o(v)!=="svelte-1ch8aud"&&(v.innerHTML=fe),It=s(t),Ct=r(t,"HR",{}),vt=s(t),B=r(t,"P",{"data-svelte-h":!0}),o(B)!=="svelte-6n93f4"&&(B.textContent=Ue),Bt=s(t),W=r(t,"P",{"data-svelte-h":!0}),o(W)!=="svelte-vmjcui"&&(W.innerHTML=je),Wt=s(t),m(Z.$$.fragment,t),Zt=s(t),_=r(t,"P",{"data-svelte-h":!0}),o(_)!=="svelte-gaivyz"&&(_.innerHTML=be),_t=s(t),A=r(t,"P",{"data-svelte-h":!0}),o(A)!=="svelte-grkdd"&&(A.innerHTML=ge),At=s(t),m($.$$.fragment,t),$t=s(t),k=r(t,"P",{"data-svelte-h":!0}),o(k)!=="svelte-62zkdx"&&(k.innerHTML=Ie),kt=s(t),m(x.$$.fragment,t),xt=s(t),R=r(t,"P",{"data-svelte-h":!0}),o(R)!=="svelte-li4i9y"&&(R.innerHTML=Ce),Rt=s(t),m(H.$$.fragment,t),Ht=s(t),Q=r(t,"P",{"data-svelte-h":!0}),o(Q)!=="svelte-cqxx2g"&&(Q.innerHTML=ve),Qt=s(t),X=r(t,"P",{"data-svelte-h":!0}),o(X)!=="svelte-7s5jat"&&(X.innerHTML=Be),Xt=s(t),G=r(t,"P",{"data-svelte-h":!0}),o(G)!=="svelte-1eg2vf7"&&(G.innerHTML=We),Gt=s(t),F=r(t,"P",{"data-svelte-h":!0}),o(F)!=="svelte-krn90s"&&(F.innerHTML=Ze),Ft=s(t),z=r(t,"P",{"data-svelte-h":!0}),o(z)!=="svelte-7mue0b"&&(z.innerHTML=_e),zt=s(t),m(S.$$.fragment,t),St=s(t),N=r(t,"P",{"data-svelte-h":!0}),o(N)!=="svelte-s22o2o"&&(N.innerHTML=Ae),Nt=s(t),E=r(t,"P",{"data-svelte-h":!0}),o(E)!=="svelte-r45iol"&&(E.innerHTML=$e),Et=s(t),m(L.$$.fragment,t),Lt=s(t),V=r(t,"P",{"data-svelte-h":!0}),o(V)!=="svelte-udg7sq"&&(V.textContent=ke),Vt=s(t),m(Y.$$.fragment,t),Yt=s(t),P=r(t,"P",{"data-svelte-h":!0}),o(P)!=="svelte-1g378vw"&&(P.innerHTML=xe),Pt=s(t),q=r(t,"P",{"data-svelte-h":!0}),o(q)!=="svelte-7q5224"&&(q.textContent=Re),qt=s(t),m(D.$$.fragment,t),Dt=s(t),m(K.$$.fragment,t),Kt=s(t),O=r(t,"P",{"data-svelte-h":!0}),o(O)!=="svelte-zqiyd3"&&(O.innerHTML=He),Ot=s(t),tt=r(t,"P",{"data-svelte-h":!0}),o(tt)!=="svelte-uu3ei2"&&(tt.innerHTML=Qe),te=s(t),m(et.$$.fragment,t),ee=s(t),lt=r(t,"P",{"data-svelte-h":!0}),o(lt)!=="svelte-z4rrh7"&&(lt.innerHTML=Xe),le=s(t),m(nt.$$.fragment,t),ne=s(t),at=r(t,"P",{"data-svelte-h":!0}),o(at)!=="svelte-1m1sj0b"&&(at.innerHTML=Ge),ae=s(t),st=r(t,"P",{"data-svelte-h":!0}),o(st)!=="svelte-16f8n49"&&(st.textContent=Fe),se=s(t),m(it.$$.fragment,t),ie=s(t),rt=r(t,"P",{"data-svelte-h":!0}),o(rt)!=="svelte-1d72yub"&&(rt.textContent=ze),re=s(t),m(ot.$$.fragment,t),oe=s(t),pt=r(t,"P",{"data-svelte-h":!0}),o(pt)!=="svelte-fq7zol"&&(pt.innerHTML=Se),pe=s(t),Mt=r(t,"P",{}),Ne(Mt).forEach(l),this.h()},h(){Ee(d,"name","hf:doc:metadata"),Ee(d,"content",tl)},m(t,e){Ke(document.head,d),n(t,ut,e),n(t,mt,e),n(t,wt,e),M(h,t,e),n(t,Tt,e),n(t,J,e),n(t,dt,e),n(t,y,e),n(t,ht,e),n(t,f,e),n(t,Jt,e),n(t,U,e),n(t,yt,e),n(t,j,e),n(t,ft,e),M(b,t,e),n(t,Ut,e),n(t,g,e),n(t,jt,e),n(t,I,e),n(t,bt,e),n(t,C,e),n(t,gt,e),n(t,v,e),n(t,It,e),n(t,Ct,e),n(t,vt,e),n(t,B,e),n(t,Bt,e),n(t,W,e),n(t,Wt,e),M(Z,t,e),n(t,Zt,e),n(t,_,e),n(t,_t,e),n(t,A,e),n(t,At,e),M($,t,e),n(t,$t,e),n(t,k,e),n(t,kt,e),M(x,t,e),n(t,xt,e),n(t,R,e),n(t,Rt,e),M(H,t,e),n(t,Ht,e),n(t,Q,e),n(t,Qt,e),n(t,X,e),n(t,Xt,e),n(t,G,e),n(t,Gt,e),n(t,F,e),n(t,Ft,e),n(t,z,e),n(t,zt,e),M(S,t,e),n(t,St,e),n(t,N,e),n(t,Nt,e),n(t,E,e),n(t,Et,e),M(L,t,e),n(t,Lt,e),n(t,V,e),n(t,Vt,e),M(Y,t,e),n(t,Yt,e),n(t,P,e),n(t,Pt,e),n(t,q,e),n(t,qt,e),M(D,t,e),n(t,Dt,e),M(K,t,e),n(t,Kt,e),n(t,O,e),n(t,Ot,e),n(t,tt,e),n(t,te,e),M(et,t,e),n(t,ee,e),n(t,lt,e),n(t,le,e),M(nt,t,e),n(t,ne,e),n(t,at,e),n(t,ae,e),n(t,st,e),n(t,se,e),M(it,t,e),n(t,ie,e),n(t,rt,e),n(t,re,e),M(ot,t,e),n(t,oe,e),n(t,pt,e),n(t,pe,e),n(t,Mt,e),me=!0},p:Ve,i(t){me\|\|(c(h.$$.fragment,t),c(b.$$.fragment,t),c(Z.$$.fragment,t),c($.$$.fragment,t),c(x.$$.fragment,t),c(H.$$.fragment,t),c(S.$$.fragment,t),c(L.$$.fragment,t),c(Y.$$.fragment,t),c(D.$$.fragment,t),c(K.$$.fragment,t),c(et.$$.fragment,t),c(nt.$$.fragment,t),c(it.$$.fragment,t),c(ot.$$.fragment,t),me=!0)},o(t){u(h.$$.fragment,t),u(b.$$.fragment,t),u(Z.$$.fragment,t),u($.$$.fragment,t),u(x.$$.fragment,t),u(H.$$.fragment,t),u(S.$$.fragment,t),u(L.$$.fragment,t),u(Y.$$.fragment,t),u(D.$$.fragment,t),u(K.$$.fragment,t),u(et.$$.fragment,t),u(nt.$$.fragment,t),u(it.$$.fragment,t),u(ot.$$.fragment,t),me=!1},d(t){t&&(l(ut),l(mt),l(wt),l(Tt),l(J),l(dt),l(y),l(ht),l(f),l(Jt),l(U),l(yt),l(j),l(ft),l(Ut),l(g),l(jt),l(I),l(bt),l(C),l(gt),l(v),l(It),l(Ct),l(vt),l(B),l(Bt),l(W),l(Wt),l(Zt),l(_),l(_t),l(A),l(At),l($t),l(k),l(kt),l(xt),l(R),l(Rt),l(Ht),l(Q),l(Qt),l(X),l(Xt),l(G),l(Gt),l(F),l(Ft),l(z),l(zt),l(St),l(N),l(Nt),l(E),l(Et),l(Lt),l(V),l(Vt),l(Yt),l(P),l(Pt),l(q),l(qt),l(Dt),l(Kt),l(O),l(Ot),l(tt),l(te),l(ee),l(lt),l(le),l(ne),l(at),l(ae),l(st),l(se),l(ie),l(rt),l(re),l(oe),l(pt),l(pe),l(Mt)),l(d),w(h,t),w(b,t),w(Z,t),w($,t),w(x,t),w(H,t),w(S,t),w(L,t),w(Y,t),w(D,t),w(K,t),w(et,t),w(nt,t),w(it,t),w(ot,t)}}}const tl='{"title":"Getting started with AWS Trainium and Hugging Face Transformers","local":"getting-started-with-aws-trainium-and-hugging-face-transformers","sections":[{"title":"Quick intro: AWS Trainium","local":"quick-intro-aws-trainium","sections":[],"depth":2},{"title":"1. Setup AWS environment","local":"1-setup-aws-environment","sections":[],"depth":2},{"title":"2. Load and process the dataset","local":"2-load-and-process-the-dataset","sections":[],"depth":2},{"title":"3. Fine-tune BERT using Hugging Face Transformers","local":"3-fine-tune-bert-using-hugging-face-transformers","sections":[],"depth":2}],"depth":1}';function el(Me){return Ye(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class il extends Pe{constructor(d){super(),qe(this,d,el,Oe,Le,{})}}export{il as component};

Xet Storage Details

Size:: 28.1 kB
Xet hash:: 7e57d0a10624198d7271c7bee4d02ae8f3c3de7a0d4ffb40072d66a6c338566b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.