| To load this model, use the following code: | |
| ```py | |
| from transformers import PreTrainedTokenizerFast, AutoModelForCausalLM, AutoConfig | |
| tokenizer = PreTrainedTokenizerFast.from_pretrained('kibrq/greedy-intersection') | |
| config = AutoConfig.from_pretrained('kibrq/greedy-intersection', trust_remote_code = True) | |
| config._from_tokenizer(freegroup_dimension, tokenizer) | |
| model = AutoModelForCausalLM.from_config(config, trust_remote_code = True) | |
| ``` | |
| To generate words from the intersection, use this code: | |
| ```py | |
| from freegroup.sampling import free_group_bounded | |
| from freegroup.tools import is_from_singleton_normal_closure | |
| from freegroup.commutators import to_tokenizer, from_tokenizer | |
| from itertools import islice | |
| batch_size = 20 | |
| prefix_length = 15 | |
| generation_config = dict( | |
| max_new_tokens = 200, | |
| ) | |
| num_runs = 10 | |
| for _ in range(num_runs): | |
| inputs = islice(free_group_bounded(3, max_length = prefix_length, random_length_method="constant"), batch_size) | |
| inputs = list(map(to_tokenizer, input)) | |
| inputs = tokenizer(input, return_tensors='pt').input_ids | |
| outputs = model.generate( | |
| inputs = input, | |
| **generation_config | |
| ) | |
| outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| outputs = map(from_tokenizer, outputs) | |
| condition = lambda x: all(map(lambda gen: is_from_singleton_normal_closure(gen, x), [[1], [2], [3], [1, 2, 3]])) | |
| outputs = filter(condition, outputs) | |
| print(list(outputs)) | |
| ``` | |