File size: 1,735 Bytes

9cdf343


# upload_model.py

import tensorflow as tf
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

# --- 1. EDIT THESE VARIABLES ---
# The base model architecture you used, e.g., 'bert-base-uncased', 'distilbert-base-cased'
MODEL_ARCH = 'bert-base-uncased' 
# Path to your saved .h5 file
H5_WEIGHTS_PATH = './my_model.h5' 
# The name for your new repository on the Hugging Face Hub
HUB_REPO_ID = "your-hf-username/your-model-name" 
# The number of labels your model was trained to predict
NUM_LABELS = 2 # Example for binary classification

# --- 2. LOAD THE TRANSFORMERS MODEL AND TOKENIZER ---
print("Loading base tokenizer and model architecture...")
# Load the tokenizer that corresponds to your model architecture
tokenizer = AutoTokenizer.from_pretrained(MODEL_ARCH)

# Load the model architecture, specifying it's a TensorFlow model and the number of classes
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL_ARCH, num_labels=NUM_LABELS, from_pt=True)

# --- 3. LOAD WEIGHTS FROM YOUR .H5 FILE ---
# NOTE: The model must be "built" before loading weights. 
# A simple way to do this is to pass a dummy input through it.
dummy_input = tokenizer("This is a dummy sentence.", return_tensors="tf")
_ = model(dummy_input) # The output of this call is not needed

print(f"Loading weights from {H5_WEIGHTS_PATH}...")
model.load_weights(H5_WEIGHTS_PATH)
print("Weights loaded successfully.")

# --- 4. PUSH THE MODEL AND TOKENIZER TO THE HUB ---
print(f"Uploading model and tokenizer to {HUB_REPO_ID}...")
# This command will create the repository if it doesn't exist
model.push_to_hub(HUB_REPO_ID)
tokenizer.push_to_hub(HUB_REPO_ID)

print("All done! Your model is now on the Hugging Face Hub.")