| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import inspect |
| import os |
| from argparse import ArgumentParser, Namespace |
| from importlib import import_module |
|
|
| import huggingface_hub |
| import numpy as np |
| from packaging import version |
|
|
| from .. import ( |
| FEATURE_EXTRACTOR_MAPPING, |
| IMAGE_PROCESSOR_MAPPING, |
| PROCESSOR_MAPPING, |
| TOKENIZER_MAPPING, |
| AutoConfig, |
| AutoFeatureExtractor, |
| AutoImageProcessor, |
| AutoProcessor, |
| AutoTokenizer, |
| is_datasets_available, |
| is_tf_available, |
| is_torch_available, |
| ) |
| from ..utils import TF2_WEIGHTS_INDEX_NAME, TF2_WEIGHTS_NAME, logging |
| from . import BaseTransformersCLICommand |
|
|
|
|
| if is_tf_available(): |
| import tensorflow as tf |
|
|
| tf.config.experimental.enable_tensor_float_32_execution(False) |
|
|
| if is_torch_available(): |
| import torch |
|
|
| if is_datasets_available(): |
| from datasets import load_dataset |
|
|
|
|
| MAX_ERROR = 5e-5 |
|
|
|
|
| def convert_command_factory(args: Namespace): |
| """ |
| Factory function used to convert a model PyTorch checkpoint in a TensorFlow 2 checkpoint. |
| |
| Returns: ServeCommand |
| """ |
| return PTtoTFCommand( |
| args.model_name, |
| args.local_dir, |
| args.max_error, |
| args.new_weights, |
| args.no_pr, |
| args.push, |
| args.extra_commit_description, |
| args.override_model_class, |
| ) |
|
|
|
|
| class PTtoTFCommand(BaseTransformersCLICommand): |
| @staticmethod |
| def register_subcommand(parser: ArgumentParser): |
| """ |
| Register this command to argparse so it's available for the transformer-cli |
| |
| Args: |
| parser: Root parser to register command-specific arguments |
| """ |
| train_parser = parser.add_parser( |
| "pt-to-tf", |
| help=( |
| "CLI tool to run convert a transformers model from a PyTorch checkpoint to a TensorFlow checkpoint." |
| " Can also be used to validate existing weights without opening PRs, with --no-pr." |
| ), |
| ) |
| train_parser.add_argument( |
| "--model-name", |
| type=str, |
| required=True, |
| help="The model name, including owner/organization, as seen on the hub.", |
| ) |
| train_parser.add_argument( |
| "--local-dir", |
| type=str, |
| default="", |
| help="Optional local directory of the model repository. Defaults to /tmp/{model_name}", |
| ) |
| train_parser.add_argument( |
| "--max-error", |
| type=float, |
| default=MAX_ERROR, |
| help=( |
| f"Maximum error tolerance. Defaults to {MAX_ERROR}. This flag should be avoided, use at your own risk." |
| ), |
| ) |
| train_parser.add_argument( |
| "--new-weights", |
| action="store_true", |
| help="Optional flag to create new TensorFlow weights, even if they already exist.", |
| ) |
| train_parser.add_argument( |
| "--no-pr", action="store_true", help="Optional flag to NOT open a PR with converted weights." |
| ) |
| train_parser.add_argument( |
| "--push", |
| action="store_true", |
| help="Optional flag to push the weights directly to `main` (requires permissions)", |
| ) |
| train_parser.add_argument( |
| "--extra-commit-description", |
| type=str, |
| default="", |
| help="Optional additional commit description to use when opening a PR (e.g. to tag the owner).", |
| ) |
| train_parser.add_argument( |
| "--override-model-class", |
| type=str, |
| default=None, |
| help="If you think you know better than the auto-detector, you can specify the model class here. " |
| "Can be either an AutoModel class or a specific model class like BertForSequenceClassification.", |
| ) |
| train_parser.set_defaults(func=convert_command_factory) |
|
|
| @staticmethod |
| def find_pt_tf_differences(pt_outputs, tf_outputs): |
| """ |
| Compares the TensorFlow and PyTorch outputs, returning a dictionary with all tensor differences. |
| """ |
| |
| pt_out_attrs = set(pt_outputs.keys()) |
| tf_out_attrs = set(tf_outputs.keys()) |
| if pt_out_attrs != tf_out_attrs: |
| raise ValueError( |
| f"The model outputs have different attributes, aborting. (Pytorch: {pt_out_attrs}, TensorFlow:" |
| f" {tf_out_attrs})" |
| ) |
|
|
| |
| def _find_pt_tf_differences(pt_out, tf_out, differences, attr_name=""): |
| |
| |
| if isinstance(pt_out, torch.Tensor): |
| tensor_difference = np.max(np.abs(pt_out.numpy() - tf_out.numpy())) |
| differences[attr_name] = tensor_difference |
| else: |
| root_name = attr_name |
| for i, pt_item in enumerate(pt_out): |
| |
| if isinstance(pt_item, str): |
| branch_name = root_name + pt_item |
| tf_item = tf_out[pt_item] |
| pt_item = pt_out[pt_item] |
| else: |
| branch_name = root_name + f"[{i}]" |
| tf_item = tf_out[i] |
| differences = _find_pt_tf_differences(pt_item, tf_item, differences, branch_name) |
|
|
| return differences |
|
|
| return _find_pt_tf_differences(pt_outputs, tf_outputs, {}) |
|
|
| def __init__( |
| self, |
| model_name: str, |
| local_dir: str, |
| max_error: float, |
| new_weights: bool, |
| no_pr: bool, |
| push: bool, |
| extra_commit_description: str, |
| override_model_class: str, |
| *args, |
| ): |
| self._logger = logging.get_logger("transformers-cli/pt_to_tf") |
| self._model_name = model_name |
| self._local_dir = local_dir if local_dir else os.path.join("/tmp", model_name) |
| self._max_error = max_error |
| self._new_weights = new_weights |
| self._no_pr = no_pr |
| self._push = push |
| self._extra_commit_description = extra_commit_description |
| self._override_model_class = override_model_class |
|
|
| def get_inputs(self, pt_model, tf_dummy_inputs, config): |
| """ |
| Returns the right inputs for the model, based on its signature. |
| """ |
|
|
| def _get_audio_input(): |
| ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") |
| speech_samples = ds.sort("id").select(range(2))[:2]["audio"] |
| raw_samples = [x["array"] for x in speech_samples] |
| return raw_samples |
|
|
| model_config_class = type(pt_model.config) |
| if model_config_class in PROCESSOR_MAPPING: |
| processor = AutoProcessor.from_pretrained(self._local_dir) |
| if model_config_class in TOKENIZER_MAPPING and processor.tokenizer.pad_token is None: |
| processor.tokenizer.pad_token = processor.tokenizer.eos_token |
| elif model_config_class in IMAGE_PROCESSOR_MAPPING: |
| processor = AutoImageProcessor.from_pretrained(self._local_dir) |
| elif model_config_class in FEATURE_EXTRACTOR_MAPPING: |
| processor = AutoFeatureExtractor.from_pretrained(self._local_dir) |
| elif model_config_class in TOKENIZER_MAPPING: |
| processor = AutoTokenizer.from_pretrained(self._local_dir) |
| if processor.pad_token is None: |
| processor.pad_token = processor.eos_token |
| else: |
| raise ValueError(f"Unknown data processing type (model config type: {model_config_class})") |
|
|
| model_forward_signature = set(inspect.signature(pt_model.forward).parameters.keys()) |
| processor_inputs = {} |
| if "input_ids" in model_forward_signature: |
| processor_inputs.update( |
| { |
| "text": ["Hi there!", "I am a batch with more than one row and different input lengths."], |
| "padding": True, |
| "truncation": True, |
| } |
| ) |
| if "pixel_values" in model_forward_signature: |
| sample_images = load_dataset("cifar10", "plain_text", split="test")[:2]["img"] |
| processor_inputs.update({"images": sample_images}) |
| if "input_features" in model_forward_signature: |
| feature_extractor_signature = inspect.signature(processor.feature_extractor).parameters |
| |
| |
| if "padding" in feature_extractor_signature: |
| default_strategy = feature_extractor_signature["padding"].default |
| if default_strategy is not False and default_strategy is not None: |
| padding_strategy = default_strategy |
| else: |
| padding_strategy = True |
| else: |
| padding_strategy = True |
| processor_inputs.update({"audio": _get_audio_input(), "padding": padding_strategy}) |
| if "input_values" in model_forward_signature: |
| processor_inputs.update({"audio": _get_audio_input(), "padding": True}) |
| pt_input = processor(**processor_inputs, return_tensors="pt") |
| tf_input = processor(**processor_inputs, return_tensors="tf") |
|
|
| |
| if ( |
| config.is_encoder_decoder |
| or (hasattr(pt_model, "encoder") and hasattr(pt_model, "decoder")) |
| or "decoder_input_ids" in tf_dummy_inputs |
| ): |
| decoder_input_ids = np.asarray([[1], [1]], dtype=int) * (pt_model.config.decoder_start_token_id or 0) |
| pt_input.update({"decoder_input_ids": torch.tensor(decoder_input_ids)}) |
| tf_input.update({"decoder_input_ids": tf.convert_to_tensor(decoder_input_ids)}) |
|
|
| return pt_input, tf_input |
|
|
| def run(self): |
| |
| if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"): |
| raise ImportError( |
| "The huggingface_hub version must be >= 0.9.0 to use this command. Please update your huggingface_hub" |
| " installation." |
| ) |
| else: |
| from huggingface_hub import Repository, create_commit |
| from huggingface_hub._commit_api import CommitOperationAdd |
|
|
| |
| repo = Repository(local_dir=self._local_dir, clone_from=self._model_name) |
|
|
| |
| config = AutoConfig.from_pretrained(self._local_dir) |
| architectures = config.architectures |
| if self._override_model_class is not None: |
| if self._override_model_class.startswith("TF"): |
| architectures = [self._override_model_class[2:]] |
| else: |
| architectures = [self._override_model_class] |
| try: |
| pt_class = getattr(import_module("transformers"), architectures[0]) |
| except AttributeError: |
| raise ValueError(f"Model class {self._override_model_class} not found in transformers.") |
| try: |
| tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) |
| except AttributeError: |
| raise ValueError(f"TF model class TF{self._override_model_class} not found in transformers.") |
| elif architectures is None: |
| pt_class = getattr(import_module("transformers"), "AutoModel") |
| tf_class = getattr(import_module("transformers"), "TFAutoModel") |
| self._logger.warning("No detected architecture, using AutoModel/TFAutoModel") |
| else: |
| if len(architectures) > 1: |
| raise ValueError(f"More than one architecture was found, aborting. (architectures = {architectures})") |
| self._logger.warning(f"Detected architecture: {architectures[0]}") |
| pt_class = getattr(import_module("transformers"), architectures[0]) |
| try: |
| tf_class = getattr(import_module("transformers"), "TF" + architectures[0]) |
| except AttributeError: |
| raise AttributeError(f"The TensorFlow equivalent of {architectures[0]} doesn't exist in transformers.") |
|
|
| |
| tf_from_pt_model = tf_class.from_config(config) |
| tf_dummy_inputs = tf_from_pt_model.dummy_inputs |
|
|
| del tf_from_pt_model |
|
|
| |
| pt_model = pt_class.from_pretrained(self._local_dir) |
| pt_model.eval() |
|
|
| pt_input, tf_input = self.get_inputs(pt_model, tf_dummy_inputs, config) |
|
|
| with torch.no_grad(): |
| pt_outputs = pt_model(**pt_input, output_hidden_states=True) |
| del pt_model |
|
|
| tf_from_pt_model = tf_class.from_pretrained(self._local_dir, from_pt=True) |
| tf_from_pt_outputs = tf_from_pt_model(**tf_input, output_hidden_states=True, training=False) |
|
|
| |
| crossload_differences = self.find_pt_tf_differences(pt_outputs, tf_from_pt_outputs) |
| output_differences = {k: v for k, v in crossload_differences.items() if "hidden" not in k} |
| hidden_differences = {k: v for k, v in crossload_differences.items() if "hidden" in k} |
| if len(output_differences) == 0 and architectures is not None: |
| raise ValueError( |
| f"Something went wrong -- the config file has architectures ({architectures}), but no model head" |
| " output was found. All outputs start with 'hidden'" |
| ) |
| max_crossload_output_diff = max(output_differences.values()) if output_differences else 0.0 |
| max_crossload_hidden_diff = max(hidden_differences.values()) |
| if max_crossload_output_diff > self._max_error or max_crossload_hidden_diff > self._max_error: |
| raise ValueError( |
| "The cross-loaded TensorFlow model has different outputs, something went wrong!\n" |
| + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" |
| + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) |
| + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" |
| + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) |
| ) |
|
|
| |
| tf_weights_path = os.path.join(self._local_dir, TF2_WEIGHTS_NAME) |
| tf_weights_index_path = os.path.join(self._local_dir, TF2_WEIGHTS_INDEX_NAME) |
| if (not os.path.exists(tf_weights_path) and not os.path.exists(tf_weights_index_path)) or self._new_weights: |
| tf_from_pt_model.save_pretrained(self._local_dir) |
| del tf_from_pt_model |
|
|
| tf_model = tf_class.from_pretrained(self._local_dir) |
| tf_outputs = tf_model(**tf_input, output_hidden_states=True) |
|
|
| conversion_differences = self.find_pt_tf_differences(pt_outputs, tf_outputs) |
| output_differences = {k: v for k, v in conversion_differences.items() if "hidden" not in k} |
| hidden_differences = {k: v for k, v in conversion_differences.items() if "hidden" in k} |
| if len(output_differences) == 0 and architectures is not None: |
| raise ValueError( |
| f"Something went wrong -- the config file has architectures ({architectures}), but no model head" |
| " output was found. All outputs start with 'hidden'" |
| ) |
| max_conversion_output_diff = max(output_differences.values()) if output_differences else 0.0 |
| max_conversion_hidden_diff = max(hidden_differences.values()) |
| if max_conversion_output_diff > self._max_error or max_conversion_hidden_diff > self._max_error: |
| raise ValueError( |
| "The converted TensorFlow model has different outputs, something went wrong!\n" |
| + f"\nList of maximum output differences above the threshold ({self._max_error}):\n" |
| + "\n".join([f"{k}: {v:.3e}" for k, v in output_differences.items() if v > self._max_error]) |
| + f"\n\nList of maximum hidden layer differences above the threshold ({self._max_error}):\n" |
| + "\n".join([f"{k}: {v:.3e}" for k, v in hidden_differences.items() if v > self._max_error]) |
| ) |
|
|
| commit_message = "Update TF weights" if self._new_weights else "Add TF weights" |
| if self._push: |
| repo.git_add(auto_lfs_track=True) |
| repo.git_commit(commit_message) |
| repo.git_push(blocking=True) |
| self._logger.warning(f"TF weights pushed into {self._model_name}") |
| elif not self._no_pr: |
| self._logger.warning("Uploading the weights into a new PR...") |
| commit_descrition = ( |
| "Model converted by the [`transformers`' `pt_to_tf`" |
| " CLI](https://github.com/huggingface/transformers/blob/main/src/transformers/commands/pt_to_tf.py). " |
| "All converted model outputs and hidden layers were validated against its PyTorch counterpart.\n\n" |
| f"Maximum crossload output difference={max_crossload_output_diff:.3e}; " |
| f"Maximum crossload hidden layer difference={max_crossload_hidden_diff:.3e};\n" |
| f"Maximum conversion output difference={max_conversion_output_diff:.3e}; " |
| f"Maximum conversion hidden layer difference={max_conversion_hidden_diff:.3e};\n" |
| ) |
| if self._max_error > MAX_ERROR: |
| commit_descrition += ( |
| f"\n\nCAUTION: The maximum admissible error was manually increased to {self._max_error}!" |
| ) |
| if self._extra_commit_description: |
| commit_descrition += "\n\n" + self._extra_commit_description |
|
|
| |
| if os.path.exists(tf_weights_index_path): |
| operations = [ |
| CommitOperationAdd(path_in_repo=TF2_WEIGHTS_INDEX_NAME, path_or_fileobj=tf_weights_index_path) |
| ] |
| for shard_path in tf.io.gfile.glob(self._local_dir + "/tf_model-*.h5"): |
| operations += [ |
| CommitOperationAdd(path_in_repo=os.path.basename(shard_path), path_or_fileobj=shard_path) |
| ] |
| else: |
| operations = [CommitOperationAdd(path_in_repo=TF2_WEIGHTS_NAME, path_or_fileobj=tf_weights_path)] |
|
|
| hub_pr_url = create_commit( |
| repo_id=self._model_name, |
| operations=operations, |
| commit_message=commit_message, |
| commit_description=commit_descrition, |
| repo_type="model", |
| create_pr=True, |
| ).pr_url |
| self._logger.warning(f"PR open in {hub_pr_url}") |
|
|