| # upload_model.py | |
| import tensorflow as tf | |
| from transformers import TFAutoModelForSequenceClassification, AutoTokenizer | |
| # --- 1. EDIT THESE VARIABLES --- | |
| # The base model architecture you used, e.g., 'bert-base-uncased', 'distilbert-base-cased' | |
| MODEL_ARCH = 'bert-base-uncased' | |
| # Path to your saved .h5 file | |
| H5_WEIGHTS_PATH = './my_model.h5' | |
| # The name for your new repository on the Hugging Face Hub | |
| HUB_REPO_ID = "your-hf-username/your-model-name" | |
| # The number of labels your model was trained to predict | |
| NUM_LABELS = 2 # Example for binary classification | |
| # --- 2. LOAD THE TRANSFORMERS MODEL AND TOKENIZER --- | |
| print("Loading base tokenizer and model architecture...") | |
| # Load the tokenizer that corresponds to your model architecture | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ARCH) | |
| # Load the model architecture, specifying it's a TensorFlow model and the number of classes | |
| model = TFAutoModelForSequenceClassification.from_pretrained(MODEL_ARCH, num_labels=NUM_LABELS, from_pt=True) | |
| # --- 3. LOAD WEIGHTS FROM YOUR .H5 FILE --- | |
| # NOTE: The model must be "built" before loading weights. | |
| # A simple way to do this is to pass a dummy input through it. | |
| dummy_input = tokenizer("This is a dummy sentence.", return_tensors="tf") | |
| _ = model(dummy_input) # The output of this call is not needed | |
| print(f"Loading weights from {H5_WEIGHTS_PATH}...") | |
| model.load_weights(H5_WEIGHTS_PATH) | |
| print("Weights loaded successfully.") | |
| # --- 4. PUSH THE MODEL AND TOKENIZER TO THE HUB --- | |
| print(f"Uploading model and tokenizer to {HUB_REPO_ID}...") | |
| # This command will create the repository if it doesn't exist | |
| model.push_to_hub(HUB_REPO_ID) | |
| tokenizer.push_to_hub(HUB_REPO_ID) | |
| print("All done! Your model is now on the Hugging Face Hub.") |