| | --- |
| | datasets: |
| | - EleutherAI/pile |
| | language: |
| | - en |
| | --- |
| | |
| | # DenseRetNet-350M |
| |
|
| | An unofficial pretraining checkpoints for DenseRetNet-350M of paper DenseMamba: https://arxiv.org/abs/2403.00818, the trainig data is 15B tokens randomly samples from The Pile dataset. |
| |
|
| |
|
| |
|
| | - recurrent generation examples: |
| |
|
| | ```python |
| | import torch |
| | import transformers |
| | model_name_or_path = '/path to model' |
| | MAX_NEW_TOKENS = 256 |
| | inference_dtype = torch.float16 |
| | |
| | generation_config = transformers.GenerationConfig( |
| | do_sample=False, |
| | max_new_tokens=MAX_NEW_TOKENS, |
| | ) |
| | |
| | tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False, trust_remote_code=True) |
| | config = transformers.AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) |
| | model = transformers.AutoModelForCausalLM.from_pretrained( |
| | model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True) # .cuda() |
| | model.cuda() |
| | model = model.half() |
| | model.eval() |
| | input_sents = 'I have a dream' |
| | inputs = tokenizer(input_sents, return_tensors="pt", truncation=True, max_length=2048) |
| | output = model.generate(input_ids=inputs["input_ids"].cuda(), |
| | generation_config=generation_config, |
| | return_dict_in_generate=True, |
| | output_scores=True |
| | ) |
| | output = tokenizer.decode(output[0].tolist(), skip_special_tokens=True) |
| | print(output) |
| | ``` |
| |
|