Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Prompt engineering","local":"prompt-engineering","sections":[{"title":"Best practices","local":"best-practices","sections":[],"depth":2},{"title":"Techniques","local":"techniques","sections":[{"title":"Few-shot prompting","local":"few-shot-prompting","sections":[],"depth":3},{"title":"Chain-of-thought","local":"chain-of-thought","sections":[],"depth":3}],"depth":2},{"title":"Fine-tuning","local":"fine-tuning","sections":[],"depth":2},{"title":"Examples","local":"examples","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/pr_33892/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/scheduler.31fdf58d.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/singletons.9860629f.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.252883d5.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/paths.e85c0ec8.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/preload-helper.40847a0e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/index.2f76fdf0.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/0.ca4aafa4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/nodes/568.467547f7.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CopyLLMTxtMenu.ff482081.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.71f274cc.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/IconCopy.ac192424.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/CodeBlock.ab12f8e1.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/DocNotebookDropdown.dd28433e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33892/en/_app/immutable/chunks/HfOption.fb051768.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Prompt engineering","local":"prompt-engineering","sections":[{"title":"Best practices","local":"best-practices","sections":[],"depth":2},{"title":"Techniques","local":"techniques","sections":[{"title":"Few-shot prompting","local":"few-shot-prompting","sections":[],"depth":3},{"title":"Chain-of-thought","local":"chain-of-thought","sections":[],"depth":3}],"depth":2},{"title":"Fine-tuning","local":"fine-tuning","sections":[],"depth":2},{"title":"Examples","local":"examples","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="prompt-engineering" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-engineering"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt engineering</span></h1> <p data-svelte-h="svelte-1l36ky5">Prompt engineering or prompting, uses natural language to improve large language model (LLM) performance on a variety of tasks. A prompt can steer the model towards generating a desired output. In many cases, you don’t even need a <a href="#finetuning">fine-tuned</a> model for a task. You just need a good prompt.</p> <p data-svelte-h="svelte-ifrhpa">Try prompting a LLM to classify some text. When you create a prompt, it’s important to provide very specific instructions about the task and what the result should look like.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = pipeline(task=<span class="hljs-string">"text-generation"</span>, model=<span class="hljs-string">"mistralai/Mistal-7B-Instruct-v0.1"</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>) | |
| prompt = <span class="hljs-string">"""Classify the text into neutral, negative or positive. | |
| Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen. | |
| Sentiment: | |
| """</span> | |
| outputs = pipeline(prompt, max_new_tokens=<span class="hljs-number">10</span>) | |
| <span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Result: <span class="hljs-subst">{output[<span class="hljs-string">'generated_text'</span>]}</span>"</span>) | |
| Result: Classify the text into neutral, negative <span class="hljs-keyword">or</span> positive. | |
| Text: This movie <span class="hljs-keyword">is</span> definitely one of my favorite movies of its kind. The interaction between respectable <span class="hljs-keyword">and</span> morally strong characters <span class="hljs-keyword">is</span> an ode to chivalry <span class="hljs-keyword">and</span> the honor code amongst thieves <span class="hljs-keyword">and</span> policemen. | |
| Sentiment: | |
| Positive<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t9p3a6">The challenge lies in designing prompts that produces the results you’re expecting because language is so incredibly nuanced and expressive.</p> <p data-svelte-h="svelte-13k67mk">This guide covers prompt engineering best practices, techniques, and examples for how to solve language and reasoning tasks.</p> <h2 class="relative group"><a id="best-practices" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#best-practices"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Best practices</span></h2> <ol data-svelte-h="svelte-6yg2m"><li><p>Try to pick the latest models for the best performance. Keep in mind that LLMs can come in two variants, <a href="https://hf.co/mistralai/Mistral-7B-v0.1" rel="nofollow">base</a> and <a href="https://hf.co/mistralai/Mistral-7B-Instruct-v0.1" rel="nofollow">instruction-tuned</a> (or chat).</p> <p>Base models are excellent at completing text given an initial prompt, but they’re not as good at following instructions. Instruction-tuned models are specifically trained versions of the base models on instructional or conversational data. This makes instruction-tuned models a better fit for prompting.</p> <blockquote class="warning"><p>Modern LLMs are typically decoder-only models, but there are some encoder-decoder LLMs like <a href="../model_doc/flan-t5">Flan-T5</a> or <a href="../model_doc/bart">BART</a> that may be used for prompting. For encoder-decoder models, make sure you set the pipeline task identifier to <code>text2text-generation</code> instead of <code>text-generation</code>.</p></blockquote></li> <li><p>Start with a short and simple prompt, and iterate on it to get better results.</p></li> <li><p>Put instructions at the beginning or end of a prompt. For longer prompts, models may apply optimizations to prevent attention from scaling quadratically, which places more emphasis at the beginning and end of a prompt.</p></li> <li><p>Clearly separate instructions from the text of interest.</p></li> <li><p>Be specific and descriptive about the task and the desired output, including for example, its format, length, style, and language. Avoid ambiguous descriptions and instructions.</p></li> <li><p>Instructions should focus on “what to do” rather than “what not to do”.</p></li> <li><p>Lead the model to generate the correct output by writing the first word or even the first sentence.</p></li> <li><p>Try other techniques like <a href="#few-shot">few-shot</a> and <a href="#chain-of-thought">chain-of-thought</a> to improve results.</p></li> <li><p>Test your prompts with different models to assess their robustness.</p></li> <li><p>Version and track your prompt performance.</p></li></ol> <h2 class="relative group"><a id="techniques" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#techniques"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Techniques</span></h2> <p data-svelte-h="svelte-m5bobg">Crafting a good prompt alone, also known as zero-shot prompting, may not be enough to get the results you want. You may need to try a few prompting techniques to get the best performance.</p> <p data-svelte-h="svelte-xs9n05">This section covers a few prompting techniques.</p> <h3 class="relative group"><a id="few-shot-prompting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#few-shot-prompting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Few-shot prompting</span></h3> <p data-svelte-h="svelte-1osuclx">Few-shot prompting improves accuracy and performance by including specific examples of what a model should generate given an input. The explicit examples give the model a better understanding of the task and the output format you’re looking for. Try experimenting with different numbers of examples (2, 4, 8, etc.) to see how it affects performance. The example below provides the model with 1 example (1-shot) of the output format (a date in MM/DD/YYYY format) it should return.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = pipeline(model=<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>) | |
| prompt = <span class="hljs-string">"""Text: The first human went into space and orbited the Earth on April 12, 1961. | |
| Date: 04/12/1961 | |
| Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. | |
| Date:"""</span> | |
| outputs = pipeline(prompt, max_new_tokens=<span class="hljs-number">12</span>, do_sample=<span class="hljs-literal">True</span>, top_k=<span class="hljs-number">10</span>) | |
| <span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Result: <span class="hljs-subst">{output[<span class="hljs-string">'generated_text'</span>]}</span>"</span>) | |
| <span class="hljs-comment"># Result: Text: The first human went into space and orbited the Earth on April 12, 1961.</span> | |
| <span class="hljs-comment"># Date: 04/12/1961</span> | |
| <span class="hljs-comment"># Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon.</span> | |
| <span class="hljs-comment"># Date: 09/28/1960</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15zvrzz">The downside of few-shot prompting is that you need to create lengthier prompts which increases computation and latency. There is also a limit to prompt lengths. Finally, a model can learn unintended patterns from your examples, and it may not work well on complex reasoning tasks.</p> <p data-svelte-h="svelte-cmca93">To improve few-shot prompting for modern instruction-tuned LLMs, use a model’s specific <a href="../conversations">chat template</a>. These models are trained on datasets with turn-based conversations between a “user” and “assistant”. Structuring your prompt to align with this can improve performance.</p> <p data-svelte-h="svelte-18odvrx">Structure your prompt as a turn-based conversation and use the <code>apply_chat_template</code> method to tokenize and format it.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = pipeline(model=<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>) | |
| messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Text: The first human went into space and orbited the Earth on April 12, 1961."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Date: 04/12/1961"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon."</span>} | |
| ] | |
| prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">True</span>) | |
| outputs = pipeline(prompt, max_new_tokens=<span class="hljs-number">12</span>, do_sample=<span class="hljs-literal">True</span>, top_k=<span class="hljs-number">10</span>) | |
| <span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Result: <span class="hljs-subst">{output[<span class="hljs-string">'generated_text'</span>]}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3kt6r9">While the basic few-shot prompting approach embedded examples within a single text string, the chat template format offers the following benefits.</p> <ul data-svelte-h="svelte-mtzqrn"><li>The model may have a potentially improved understanding because it can better recognize the pattern and the expected roles of user input and assistant output.</li> <li>The model may more consistently output the desired output format because it is structured like its input during training.</li></ul> <p data-svelte-h="svelte-11gqod1">Always consult a specific instruction-tuned model’s documentation to learn more about the format of their chat template so that you can structure your few-shot prompts accordingly.</p> <h3 class="relative group"><a id="chain-of-thought" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chain-of-thought"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chain-of-thought</span></h3> <p data-svelte-h="svelte-1va4f85">Chain-of-thought (CoT) is effective at generating more coherent and well-reasoned outputs by providing a series of prompts that help a model “think” more thoroughly about a topic.</p> <p data-svelte-h="svelte-1314x5c">The example below provides the model with several prompts to work through intermediate reasoning steps.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = pipeline(model=<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>) | |
| prompt = <span class="hljs-string">"""Let's go through this step-by-step: | |
| 1. You start with 15 muffins. | |
| 2. You eat 2 muffins, leaving you with 13 muffins. | |
| 3. You give 5 muffins to your neighbor, leaving you with 8 muffins. | |
| 4. Your partner buys 6 more muffins, bringing the total number of muffins to 14. | |
| 5. Your partner eats 2 muffins, leaving you with 12 muffins. | |
| If you eat 6 muffins, how many are left?"""</span> | |
| outputs = pipeline(prompt, max_new_tokens=<span class="hljs-number">20</span>, do_sample=<span class="hljs-literal">True</span>, top_k=<span class="hljs-number">10</span>) | |
| <span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Result: <span class="hljs-subst">{output[<span class="hljs-string">'generated_text'</span>]}</span>"</span>) | |
| Result: Let<span class="hljs-string">'s go through this step-by-step: | |
| 1. You start with 15 muffins. | |
| 2. You eat 2 muffins, leaving you with 13 muffins. | |
| 3. You give 5 muffins to your neighbor, leaving you with 8 muffins. | |
| 4. Your partner buys 6 more muffins, bringing the total number of muffins to 14. | |
| 5. Your partner eats 2 muffins, leaving you with 12 muffins. | |
| If you eat 6 muffins, how many are left? | |
| Answer: 6</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1oqty7r">Like <a href="#few-shot">few-shot</a> prompting, the downside of CoT is that it requires more effort to design a series of prompts that help the model reason through a complex task and prompt length increases latency.</p> <h2 class="relative group"><a id="fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fine-tuning</span></h2> <p data-svelte-h="svelte-164c0yp">While prompting is a powerful way to work with LLMs, there are scenarios where a fine-tuned model or even fine-tuning a model works better.</p> <p data-svelte-h="svelte-1vq8s8p">Here are some examples scenarios where a fine-tuned model makes sense.</p> <ul data-svelte-h="svelte-16pui82"><li>Your domain is extremely different from what a LLM was pretrained on, and extensive prompting didn’t produce the results you want.</li> <li>Your model needs to work well in a low-resource language.</li> <li>Your model needs to be trained on sensitive data that have strict regulatory requirements.</li> <li>You’re using a small model due to cost, privacy, infrastructure, or other constraints.</li></ul> <p data-svelte-h="svelte-1ms6wkd">In all of these scenarios, ensure that you have a large enough domain-specific dataset to train your model with, have enough time and resources, and the cost of fine-tuning is worth it. Otherwise, you may be better off trying to optimize your prompt.</p> <h2 class="relative group"><a id="examples" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#examples"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Examples</span></h2> <p data-svelte-h="svelte-jdpxp7">The examples below demonstrate prompting a LLM for different tasks.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">named entity recognition </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">translation </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">summarization </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">question answering </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline = pipeline(model=<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>, dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>) | |
| prompt = <span class="hljs-string">"""Return a list of named entities in the text. | |
| Text: The company was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf in New York City, originally as a company that developed a chatbot app targeted at teenagers. | |
| Named entities: | |
| """</span> | |
| outputs = pipeline(prompt, max_new_tokens=<span class="hljs-number">50</span>, return_full_text=<span class="hljs-literal">False</span>) | |
| <span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Result: <span class="hljs-subst">{output[<span class="hljs-string">'generated_text'</span>]}</span>"</span>) | |
| Result: [Clément Delangue, Julien Chaumond, Thomas Wolf, company, New York City, chatbot app, teenagers]<!-- HTML_TAG_END --></pre></div> </div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/tasks/prompting.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_16tnnm8 = { | |
| assets: "/docs/transformers/pr_33892/en", | |
| base: "/docs/transformers/pr_33892/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/pr_33892/en/_app/immutable/entry/start.b2c4257a.js"), | |
| import("/docs/transformers/pr_33892/en/_app/immutable/entry/app.05ef1f97.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 568], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 37.6 kB
- Xet hash:
- e29612343738883941eefb51e950deccfd0b3f1f7231533c0f13e44f57e493ca
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.