| <!--Copyright 2023 The HuggingFace Team. All rights reserved. | |
| Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with | |
| the License. You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on | |
| an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the | |
| specific language governing permissions and limitations under the License. | |
| โ ๏ธ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be | |
| rendered properly in your Markdown viewer. | |
| --> | |
| *์ด ๋ชจ๋ธ์ 2023๋ 8์ 24์ผ์ ๊ณต๊ฐ๋์์ผ๋ฉฐ, 2023๋ 8์ 25์ผ์ Hugging Face Transformers์ ์ถ๊ฐ๋์์ต๋๋ค.* | |
| <div style="float: right;"> | |
| <div class="flex flex-wrap space-x-1"> | |
| <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white"> | |
| "> | |
| </div> | |
| </div> | |
| # CodeLlama[[codellama]] | |
| [Code Llama](https://huggingface.co/papers/2308.12950)๋ ์ฝ๋ฉ ์์ ์ ํนํ๋ ๋๊ท๋ชจ ์ธ์ด ๋ชจ๋ธ ๊ณ์ด๋ก, [Llama 2](./llama2)๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ๊ฐ๋ฐ๋์์ต๋๋ค. ์ผ๋ฐ์ ์ธ ์ฝ๋, Python ํนํ, ๋ช ๋ น์ด(์ง์) ๊ธฐ๋ฐ ๋ณํ ๋ฑ ๋ค์ํ ๋ฒ์ ์ผ๋ก ์ ๊ณต๋๋ฉฐ, ๋ชจ๋ 7B, 13B, 34B, 70B ๋งค๊ฐ๋ณ์ ํฌ๊ธฐ๋ก ์ฌ์ฉํ ์ ์์ต๋๋ค. Code Llama ๋ชจ๋ธ์ ์ฝ๋๋ฅผ ์์ฑํ๊ณ ์ค๋ช ํ๋ฉฐ, ์ฝ๋์ ๋๋ฝ๋ ๋ถ๋ถ์ ์ฑ์ธ ์๋ ์์ต๋๋ค. ์ด๋ฅผ ์ธํ๋ง(infilling)์ด๋ผ๊ณ ํฉ๋๋ค. 16K ํ ํฐ ๊ธธ์ด๋ก ํ๋ จ๋์์ง๋ง, ์ต๋ 100K ํ ํฐ๊น์ง ์์ ์ ์ผ๋ก ์์ฑํ๋ฉฐ ๊ธด ์ปจํ ์คํธ๋ ์ฒ๋ฆฌํ ์ ์์ต๋๋ค. | |
| [Code Llama](https://huggingface.co/collections/meta-llama/code-llama-family-661da32d0a9d678b6f55b933) ์ปฌ๋ ์ ์์ ๋ชจ๋ ์๋ณธ Code Llama ์ฒดํฌํฌ์ธํธ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. | |
| > [!TIP] | |
| > ๋ค์ํ ์ฝ๋ฉ ์์ ์ Code Llama๋ฅผ ์ ์ฉํ๋ ๋ ๋ง์ ์์๋ฅผ ๋ณด๋ ค๋ฉด ์ค๋ฅธ์ชฝ ์ฌ์ด๋๋ฐ์ Code Llama ๋ชจ๋ธ์ ํด๋ฆญํ์ธ์. | |
| ์๋ ์์๋ [`Pipeline`], [`AutoModel`], ๊ทธ๋ฆฌ๊ณ ๋ช ๋ น์ค์์ ์ฝ๋๋ฅผ ์์ฑํ๋ ๋ฐฉ๋ฒ์ ๋ณด์ฌ์ค๋๋ค. | |
| <hfoptions id="usage"> | |
| <hfoption id="Pipeline"> | |
| ```py | |
| import torch | |
| from transformers import pipeline | |
| pipe = pipeline( | |
| "text-generation", | |
| model="meta-llama/CodeLlama-7b-hf", | |
| torch_dtype=torch.float16, | |
| device_map=0 | |
| ) | |
| # ๊ธฐ๋ณธ ์ฝ๋ ์์ฑ | |
| result = pipe("# Function to calculate the factorial of a number\ndef factorial(n):", max_new_tokens=256) | |
| print(result[0]['generated_text']) | |
| # ์ธํ๋ง | |
| infill_result = pipe("def remove_non_ascii(s: str) -> str:\n \"\"\" <FILL_ME>\n return result", max_new_tokens=200) | |
| print(infill_result[0]['generated_text']) | |
| ``` | |
| </hfoption> | |
| <hfoption id="AutoModel"> | |
| ```py | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "meta-llama/CodeLlama-7b-hf", | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| attn_implementation="sdpa" | |
| ) | |
| # ๊ธฐ๋ณธ ์ฝ๋ ์์ฑ | |
| prompt = "# Function to calculate the factorial of a number\ndef factorial(n):" | |
| input_ids = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| output = model.generate( | |
| **input_ids, | |
| max_new_tokens=256, | |
| cache_implementation="static" | |
| ) | |
| print(tokenizer.decode(output[0], skip_special_tokens=True)) | |
| # ์ธํ๋ง | |
| infill_prompt = "def remove_non_ascii(s: str) -> str:\n \"\"\" <FILL_ME>\n return result" | |
| input_ids = tokenizer(infill_prompt, return_tensors="pt").to(model.device) | |
| filled_output = model.generate(**input_ids, max_new_tokens=200) | |
| filled_text = tokenizer.decode(filled_output[0], skip_special_tokens=True) | |
| print(filled_text) | |
| ``` | |
| </hfoption> | |
| <hfoption id="transformers CLI"> | |
| ```bash | |
| echo -e "# Function to calculate the factorial of a number\ndef factorial(n):" | transformers run --task text-generation --model meta-llama/CodeLlama-7b-hf --device 0 | |
| ``` | |
| </hfoption> | |
| </hfoptions> | |
| ์์ํ๋ ๊ฐ์ค์น๋ฅผ ๋ ๋ฎ์ ์ ๋ฐ๋๋ก ํํํ์ฌ ๋๊ท๋ชจ ๋ชจ๋ธ์ ๋ฉ๋ชจ๋ฆฌ ๋ถ๋ด์ ์ค์ ๋๋ค. ๋ ๋ง์ ์ฌ์ฉ ๊ฐ๋ฅํ ์์ํ ๋ฐฑ์๋๋ [์์ํ](../quantization/overview) ๊ฐ์๋ฅผ ์ฐธ์กฐํ์ธ์. | |
| ์๋ ์์๋ [bitsandbytes](../quantization/bitsandbytes)๋ฅผ ์ฌ์ฉํ์ฌ ๊ฐ์ค์น๋ฅผ 4๋นํธ๋ก๋ง ์์ํํฉ๋๋ค. | |
| ```py | |
| # bitsandbytes๋ฅผ ์ค์นํฉ๋๋ค. | |
| import torch | |
| from transformers import AutoModelForCausalLM, CodeLlamaTokenizer, BitsAndBytesConfig | |
| bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True) | |
| tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-34b-hf") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "meta-llama/CodeLlama-34b-hf", | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| quantization_config=bnb_config | |
| ) | |
| prompt = "# Write a Python function to check if a string is a palindrome\ndef is_palindrome(s):" | |
| input_ids = tokenizer(prompt, return_tensors="pt").to("cuda") | |
| output = model.generate(**input_ids, max_new_tokens=200, cache_implementation="static") | |
| print(tokenizer.decode(output[0], skip_special_tokens=True)) | |
| ``` | |
| [AttentionMaskVisualizer](https://github.com/huggingface/transformers/blob/beb9b5b02246b9b7ee81ddf938f93f44cfeaad19/src/transformers/utils/attention_visualizer.py#L139)๋ฅผ ์ฌ์ฉํ๋ฉด ๋ชจ๋ธ์ด ์ด๋ค ํ ํฐ์ ์ฃผ์๋ฅผ ๊ธฐ์ธ์ผ ์ ์๊ณ ๊ธฐ์ธ์ผ ์ ์๋์ง๋ฅผ ๋ ์ ์ดํดํ ์ ์์ต๋๋ค. | |
| ```py | |
| from transformers.utils.attention_visualizer import AttentionMaskVisualizer | |
| visualizer = AttentionMaskVisualizer("meta-llama/CodeLlama-7b-hf") | |
| visualizer("""def func(a, b): | |
| return a + b""") | |
| ``` | |
| <div class="flex justify-center"> | |
| <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/codellama-attn-mask.png"/> | |
| </div> | |
| ## ์ฐธ๊ณ ์ฌํญ[[notes]] | |
| - ์ธํ๋ง ๊ธฐ๋ฅ์ 7B ๋ฐ 13B ๊ธฐ๋ฐ ๋ชจ๋ธ์์๋ง ์ฌ์ฉํ ์ ์์ผ๋ฉฐ, Python, Instruct, 34B ๋๋ 70B ๋ชจ๋ธ์์๋ ์ฌ์ฉํ ์ ์์ต๋๋ค. | |
| - ์ฝ๋๋ฅผ ์ฑ์ ๋ฃ๊ณ ์ถ์ ๋ถ๋ถ์ `<FILL_ME>` ํ ํฐ์ ์ฌ์ฉํ์ธ์. ํ ํฌ๋์ด์ ๋ ์ด ํ ํฐ์ ๋ถํ ํ์ฌ [์๋ณธ ํ๋ จ ํจํด](https://github.com/facebookresearch/codellama/blob/cb51c14ec761370ba2e2bc351374a79265d0465e/llama/generation.py#L402) ์ ๋ฐ๋ฅด๋ ์ ๋ ฅ ๋ฌธ์์ด๋ก ๋ณํํฉ๋๋ค. ์ด๋ ์ง์ ํจํด์ ์ค๋นํ๋ ๊ฒ๋ณด๋ค ๋ ์์ ์ ์ ๋๋ค. | |
| ```py | |
| from transformers import LlamaForCausalLM, CodeLlamaTokenizer | |
| tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf") | |
| model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf") | |
| PROMPT = '''def remove_non_ascii(s: str) -> str: | |
| """ <FILL_ME> | |
| return result | |
| ''' | |
| input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"] | |
| generated_ids = model.generate(input_ids, max_new_tokens=128) | |
| filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0] | |
| print(PROMPT.replace("<FILL_ME>", filling)) | |
| ``` | |
| - ์ถ๊ฐ ํ๋ จ์ด๋ ๋ฏธ์ธ ์กฐ์ ์๋ `bfloat16`์ ์ฌ์ฉํ๊ณ ์ถ๋ก ์๋ `float16`์ ์ฌ์ฉํ์ธ์. | |
| - `BOS` ๋ฌธ์๋ ์ ๋์ฌ๋ ์ ๋ฏธ์ฌ๋ฅผ ์ธ์ฝ๋ฉํ ๋ ์ธํ๋ง ์์ ์ ์ฌ์ฉ๋์ง ์์ผ๋ฉฐ, ๊ฐ ํ๋กฌํํธ์ ๋งจ ์์์๋ง ์ฌ์ฉ๋ฉ๋๋ค. | |
| - ํ ํฌ๋์ด์ ๋ [SentencePiece](https://github.com/google/sentencepiece)๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ํ๋ byte-pair ์ธ์ฝ๋ฉ ๋ชจ๋ธ์ ๋๋ค. ๋์ฝ๋ฉ ๊ณผ์ ์์ ์ฒซ ๋ฒ์งธ ํ ํฐ์ด ๋จ์ด์ ์์์ธ ๊ฒฝ์ฐ(์๋ฅผ ๋ค์ด "Banana"), ํ ํฌ๋์ด์ ๋ ๋ฌธ์์ด์ ์ ๋์ฌ ๊ณต๋ฐฑ์ ์ถ๊ฐํ์ง ์์ต๋๋ค. | |
| ## CodeLlamaTokenizer | |
| [[autodoc]] CodeLlamaTokenizer | |
| - get_special_tokens_mask | |
| - save_vocabulary | |
| ## CodeLlamaTokenizerFast | |
| [[autodoc]] CodeLlamaTokenizerFast | |
| - get_special_tokens_mask | |
| - update_post_processor | |
| - save_vocabulary | |