Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1007 /en /chapter1 /2.html

rtrm

3 months ago

download

raw

15.7 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Natural Language Processing and Large Language Models","local":"natural-language-processing-and-large-language-models","sections":[{"title":"What is NLP?","local":"what-is-nlp","sections":[],"depth":2},{"title":"The Rise of Large Language Models (LLMs)","local":"rise-of-llms","sections":[],"depth":2},{"title":"Why is language processing challenging?","local":"why-is-it-challenging","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1007/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/entry/start.3e9ba80c.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/scheduler.37c15a92.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/singletons.53a27d8c.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/index.18351ede.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/paths.8363c544.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/entry/app.02fda992.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/index.7cb9c9b8.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/nodes/0.865283b9.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/nodes/6.4c5d0306.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/Tip.d10b3fc9.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/Youtube.8666c400.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/CourseFloatingBanner.df82c153.js">
	<link rel="modulepreload" href="/docs/course/pr_1007/en/_app/immutable/chunks/getInferenceSnippets.b6a8c7d2.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Natural Language Processing and Large Language Models","local":"natural-language-processing-and-large-language-models","sections":[{"title":"What is NLP?","local":"what-is-nlp","sections":[],"depth":2},{"title":"The Rise of Large Language Models (LLMs)","local":"rise-of-llms","sections":[],"depth":2},{"title":"Why is language processing challenging?","local":"why-is-it-challenging","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="natural-language-processing-and-large-language-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#natural-language-processing-and-large-language-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Natural Language Processing and Large Language Models</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-1-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> </div> <p data-svelte-h="svelte-tmivth">Before jumping into Transformer models, let’s do a quick overview of what natural language processing is, how large language models have transformed the field, and why we care about it.</p> <h2 class="relative group"><a id="what-is-nlp" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-is-nlp"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What is NLP?</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/iNzlxWUAjd4" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-14hz609">NLP is a field of linguistics and machine learning focused on understanding everything related to human language. The aim of NLP tasks is not only to understand single words individually, but to be able to understand the context of those words.</p> <p data-svelte-h="svelte-rkg1z0">The following is a list of common NLP tasks, with some examples of each:</p> <ul data-svelte-h="svelte-19s7o39"><li><strong>Classifying whole sentences</strong>: Getting the sentiment of a review, detecting if an email is spam, determining if a sentence is grammatically correct or whether two sentences are logically related or not</li> <li><strong>Classifying each word in a sentence</strong>: Identifying the grammatical components of a sentence (noun, verb, adjective), or the named entities (person, location, organization)</li> <li><strong>Generating text content</strong>: Completing a prompt with auto-generated text, filling in the blanks in a text with masked words</li> <li><strong>Extracting an answer from a text</strong>: Given a question and a context, extracting the answer to the question based on the information provided in the context</li> <li><strong>Generating a new sentence from an input text</strong>: Translating a text into another language, summarizing a text</li></ul> <p data-svelte-h="svelte-kv2x67">NLP isn’t limited to written text though. It also tackles complex challenges in speech recognition and computer vision, such as generating a transcript of an audio sample or a description of an image.</p> <h2 class="relative group"><a id="rise-of-llms" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rise-of-llms"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>The Rise of Large Language Models (LLMs)</span></h2> <p data-svelte-h="svelte-a773vn">In recent years, the field of NLP has been revolutionized by Large Language Models (LLMs). These models, which include architectures like GPT (Generative Pre-trained Transformer) and <a href="https://huggingface.co/meta-llama" rel="nofollow">Llama</a>, have transformed what’s possible in language processing.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-fqnqk8">A large language model (LLM) is an AI model trained on massive amounts of text data that can understand and generate human-like text, recognize patterns in language, and perform a wide variety of language tasks without task-specific training. They represent a significant advancement in the field of natural language processing (NLP).</p></div> <p data-svelte-h="svelte-9xv972">LLMs are characterized by:</p> <ul data-svelte-h="svelte-141j3vz"><li><strong>Scale</strong>: They contain millions, billions, or even hundreds of billions of parameters</li> <li><strong>General capabilities</strong>: They can perform multiple tasks without task-specific training</li> <li><strong>In-context learning</strong>: They can learn from examples provided in the prompt</li> <li><strong>Emergent abilities</strong>: As these models grow in size, they demonstrate capabilities that weren’t explicitly programmed or anticipated</li></ul> <p data-svelte-h="svelte-166lsa3">The advent of LLMs has shifted the paradigm from building specialized models for specific NLP tasks to using a single, large model that can be prompted or fine-tuned to address a wide range of language tasks. This has made sophisticated language processing more accessible while also introducing new challenges in areas like efficiency, ethics, and deployment.</p> <p data-svelte-h="svelte-afnoj0">However, LLMs also have important limitations:</p> <ul data-svelte-h="svelte-1mny30g"><li><strong>Hallucinations</strong>: They can generate incorrect information confidently</li> <li><strong>Lack of true understanding</strong>: They lack true understanding of the world and operate purely on statistical patterns</li> <li><strong>Bias</strong>: They may reproduce biases present in their training data or inputs.</li> <li><strong>Context windows</strong>: They have limited context windows (though this is improving)</li> <li><strong>Computational resources</strong>: They require significant computational resources</li></ul> <h2 class="relative group"><a id="why-is-it-challenging" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-is-it-challenging"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why is language processing challenging?</span></h2> <p data-svelte-h="svelte-1g6qo1w">Computers don’t process information in the same way as humans. For example, when we read the sentence “I am hungry,” we can easily understand its meaning. Similarly, given two sentences such as “I am hungry” and “I am sad,” we’re able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.</p> <p data-svelte-h="svelte-7biue8">Even with the advances in LLMs, many fundamental challenges remain. These include understanding ambiguity, cultural context, sarcasm, and humor. LLMs address these challenges through massive training on diverse datasets, but still often fall short of human-level understanding in many complex scenarios.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/en/chapter1/2.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_36rhiy = {
	assets: "/docs/course/pr_1007/en",
	base: "/docs/course/pr_1007/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1007/en/_app/immutable/entry/start.3e9ba80c.js"),
	import("/docs/course/pr_1007/en/_app/immutable/entry/app.02fda992.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 6],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 15.7 kB
Xet hash:: cc5c561a4c2d8b354208ed7b911f7faffe0c90e05a539cc438c7f4af82d4abe5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.