| import torch |
| import transformers |
| from typing import Optional, Union |
| from lm_eval.base import BaseLM |
|
|
|
|
| class HFLM(BaseLM): |
| def __init__( |
| self, |
| device="cuda", |
| pretrained="gpt2", |
| revision="main", |
| low_cpu_mem_usage=None, |
| torch_dtype=None, |
| device_map=None, |
| subfolder=None, |
| tokenizer=None, |
| batch_size=1, |
| load_in_8bit: Optional[bool] = False, |
| trust_remote_code: Optional[bool] = False, |
| use_fast: Optional[bool] = True, |
| ): |
| super().__init__() |
|
|
| assert isinstance(device, str) |
| assert isinstance(pretrained, str) |
| assert isinstance(batch_size, int) |
|
|
| if device: |
| if device not in ["cuda", "cpu"]: |
| device = int(device) |
| self._device = torch.device(device) |
| print(f"Using device '{device}'") |
| else: |
| print("Device not specified") |
| print(f"Cuda Available? {torch.cuda.is_available()}") |
| self._device = ( |
| torch.device("cuda") |
| if torch.cuda.is_available() |
| else torch.device("cpu") |
| ) |
|
|
| |
| revision = revision + ("/" + subfolder if subfolder is not None else "") |
|
|
| self.gpt2 = transformers.AutoModelForCausalLM.from_pretrained( |
| pretrained, |
| load_in_8bit=load_in_8bit, |
| low_cpu_mem_usage=low_cpu_mem_usage, |
| torch_dtype=torch_dtype, |
| device_map=device_map, |
| revision=revision, |
| trust_remote_code=trust_remote_code, |
| ).eval() |
| if not load_in_8bit: |
| try: |
| self.gpt2.to(self.device) |
| except: |
| print( |
| "Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore." |
| ) |
| self.tokenizer = transformers.AutoTokenizer.from_pretrained( |
| pretrained if tokenizer is None else tokenizer, |
| revision=revision, |
| trust_remote_code=trust_remote_code, |
| use_fast=use_fast, |
| ) |
| self.vocab_size = self.tokenizer.vocab_size |
|
|
| |
| self.batch_size_per_gpu = batch_size |
|
|
| |
| |
| |
| |
|
|
| @property |
| def eot_token_id(self): |
| |
| return self.tokenizer.eos_token_id |
|
|
| @property |
| def max_length(self): |
| try: |
| return self.gpt2.config.n_ctx |
| except AttributeError: |
| |
| return self.gpt2.config.max_position_embeddings |
|
|
| @property |
| def max_gen_toks(self): |
| return 256 |
|
|
| @property |
| def batch_size(self): |
| |
| return self.batch_size_per_gpu |
|
|
| @property |
| def device(self): |
| |
| return self._device |
|
|
| def tok_encode(self, string: str): |
| return self.tokenizer.encode(string, add_special_tokens=False) |
|
|
| def tok_decode(self, tokens): |
| return self.tokenizer.decode(tokens) |
|
|
| def _model_call(self, inps): |
| """ |
| inps: a torch tensor of shape [batch, sequence] |
| the size of sequence may vary from call to call |
| |
| returns: a torch tensor of shape [batch, sequence, vocab] with the |
| logits returned from the model |
| """ |
| with torch.no_grad(): |
| return self.gpt2(inps)[0] |
|
|
| def _model_generate(self, context, max_length, eos_token_id): |
| return self.gpt2.generate( |
| context, |
| max_length=max_length, |
| eos_token_id=eos_token_id, |
| pad_token_id=eos_token_id, |
| do_sample=False, |
| ) |
|
|
|
|
| |
| GPT2LM = HFLM |
|
|