| import os |
|
|
| import accelerate |
| import einops |
| import huggingface_hub |
| import torch |
| import transformers |
| from jinja2 import Environment, FileSystemLoader |
|
|
| from llm_studio.app_utils.sections.chat import load_cfg_model_tokenizer |
| from llm_studio.app_utils.utils import hf_repo_friendly_name, save_hf_yaml, set_env |
| from llm_studio.src.utils.config_utils import NON_GENERATION_PROBLEM_TYPES |
| from llm_studio.src.utils.modeling_utils import check_disk_space |
|
|
|
|
| def get_model_card(cfg, model, repo_id) -> huggingface_hub.ModelCard: |
| """ |
| Method to define the Model Card. |
| |
| It is possible to change the language, the library name, and the tags. |
| These values will appear in the Model Card tab of Hugging Face. |
| |
| Parameters: |
| cfg : Configuration parameters for the model card. |
| model : The model for which the model card is being generated. |
| repo_id : The ID of the target Hugging Face repository. |
| |
| Returns: |
| huggingface_hub.ModelCard: The Model Card containing model information. |
| """ |
| card_data = huggingface_hub.ModelCardData( |
| language="en", |
| library_name="transformers", |
| tags=["gpt", "llm", "large language model", "h2o-llmstudio"], |
| ) |
| cfg_kwargs = dict( |
| use_fast=cfg.tokenizer.use_fast, |
| text_prompt_start=cfg.dataset.text_prompt_start, |
| text_answer_separator=cfg.dataset.text_answer_separator, |
| trust_remote_code=cfg.environment.trust_remote_code, |
| end_of_sentence=( |
| cfg._tokenizer_eos_token if cfg.dataset.add_eos_token_to_prompt else "" |
| ), |
| ) |
| if cfg.problem_type not in NON_GENERATION_PROBLEM_TYPES: |
| cfg_kwargs.update( |
| dict( |
| min_new_tokens=cfg.prediction.min_length_inference, |
| max_new_tokens=cfg.prediction.max_length_inference, |
| do_sample=cfg.prediction.do_sample, |
| num_beams=cfg.prediction.num_beams, |
| temperature=cfg.prediction.temperature, |
| repetition_penalty=cfg.prediction.repetition_penalty, |
| ) |
| ) |
|
|
| card = huggingface_hub.ModelCard.from_template( |
| card_data, |
| template_path=os.path.join("model_cards", cfg.environment._model_card_template), |
| base_model=cfg.llm_backbone, |
| repo_id=repo_id, |
| model_architecture=model.backbone.__repr__(), |
| config=cfg.__repr__(), |
| transformers_version=transformers.__version__, |
| einops_version=einops.__version__, |
| accelerate_version=accelerate.__version__, |
| torch_version=torch.__version__.split("+")[0], |
| **cfg_kwargs, |
| ) |
| return card |
|
|
|
|
| def publish_model_to_hugging_face( |
| path_to_experiment: str, |
| model_name: str, |
| user_id: str = None, |
| api_key: str = None, |
| device: str = "cuda:0", |
| safe_serialization: bool = True, |
| ) -> None: |
| """ |
| Method to publish the model to Hugging Face. |
| |
| Parameters: |
| path_to_experiment: The file path of the fine-tuned model's files. |
| device: The target device for running the model, either 'cpu', 'cpu_shard' |
| or 'cuda:0'. |
| user_id: The Hugging Face user ID. |
| api_key: The Hugging Face API Key. |
| model_name: The name of the model to be published on Hugging Face. |
| safe_serialization: A flag indicating whether safe serialization should be used. |
| |
| Returns: |
| None. The model is published to the specified Hugging Face repository. |
| """ |
|
|
| |
| if device == "cpu" or device == "cpu_shard": |
| pass |
| elif device.startswith("cuda:") and device[5:].isdigit(): |
| pass |
| else: |
| raise ValueError( |
| "Invalid device value. Use 'cpu', 'cpu_shard' or 'cuda:INTEGER'." |
| ) |
|
|
| with set_env(HUGGINGFACE_TOKEN=api_key): |
| cfg, model, tokenizer = load_cfg_model_tokenizer( |
| path_to_experiment, |
| merge=True, |
| device=device, |
| ) |
|
|
| check_disk_space(model.backbone, "./") |
|
|
| |
| if api_key: |
| huggingface_hub.login(api_key) |
|
|
| |
| if user_id == "": |
| user_id = huggingface_hub.whoami()["name"] |
|
|
| repo_id = f"{user_id}/{hf_repo_friendly_name(model_name)}" |
|
|
| |
| tokenizer.push_to_hub(repo_id=repo_id, private=True) |
|
|
| |
| card = get_model_card(cfg, model, repo_id) |
| card.push_to_hub( |
| repo_id=repo_id, repo_type="model", commit_message="Upload model card" |
| ) |
|
|
| api = huggingface_hub.HfApi() |
|
|
| |
| if os.path.isfile(f"{path_to_experiment}/classification_head.pth"): |
| api.upload_file( |
| path_or_fileobj=f"{path_to_experiment}/classification_head.pth", |
| path_in_repo="classification_head.pth", |
| repo_id=repo_id, |
| repo_type="model", |
| commit_message="Upload classification_head.pth", |
| ) |
|
|
| |
| api.upload_file( |
| path_or_fileobj=os.path.join(path_to_experiment, "cfg.yaml"), |
| path_in_repo="cfg.yaml", |
| repo_id=repo_id, |
| repo_type="model", |
| commit_message="Upload cfg.yaml", |
| ) |
|
|
| |
| model.backbone.config.custom_pipelines = { |
| "text-generation": { |
| "impl": "h2oai_pipeline.H2OTextGenerationPipeline", |
| "pt": "AutoModelForCausalLM", |
| } |
| } |
|
|
| model.backbone.push_to_hub( |
| repo_id=repo_id, |
| private=True, |
| commit_message="Upload model", |
| safe_serialization=safe_serialization, |
| ) |
|
|
| |
| output_directory = cfg.output_directory |
| save_hf_yaml( |
| path=f"{output_directory.rstrip('/')}/hf.yaml", |
| account_name=user_id, |
| model_name=model_name, |
| repo_id=repo_id, |
| ) |
|
|
| |
| template_env = Environment(loader=FileSystemLoader(searchpath="llm_studio/src/")) |
|
|
| pipeline_template = template_env.get_template("h2oai_pipeline_template.py") |
|
|
| data = { |
| "text_prompt_start": cfg.dataset.text_prompt_start, |
| "text_answer_separator": cfg.dataset.text_answer_separator, |
| } |
|
|
| if cfg.dataset.add_eos_token_to_prompt: |
| data.update({"end_of_sentence": cfg._tokenizer_eos_token}) |
| else: |
| data.update({"end_of_sentence": ""}) |
|
|
| custom_pipeline = pipeline_template.render(data) |
|
|
| custom_pipeline_path = os.path.join(path_to_experiment, "h2oai_pipeline.py") |
|
|
| with open(custom_pipeline_path, "w") as f: |
| f.write(custom_pipeline) |
|
|
| api.upload_file( |
| path_or_fileobj=custom_pipeline_path, |
| path_in_repo="h2oai_pipeline.py", |
| repo_id=repo_id, |
| repo_type="model", |
| commit_message="Upload h2oai_pipeline.py", |
| ) |
|
|