File size: 4,036 Bytes
b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 b7c31ab edc8356 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
"""
Script to upload the email classification model to Hugging Face Hub
"""
import sys
import argparse
import subprocess
import pkg_resources
def check_and_install_dependencies():
"""Check for required libraries and install if missing"""
required_packages = ['torch', 'transformers', 'sentencepiece']
installed_packages = {pkg.key for pkg in pkg_resources.working_set}
missing_packages = [pkg for pkg in required_packages if pkg not in installed_packages]
if missing_packages:
missing_packages_str = ", ".join(missing_packages)
print(f"Installing missing dependencies: {missing_packages_str}")
subprocess.check_call([sys.executable, "-m", "pip", "install"]
+ missing_packages)
print("Dependencies installed. You may need to restart the script.")
return False
return True
def get_huggingface_username(token=None):
"""Get the username for the authenticated user"""
try:
from huggingface_hub import HfApi
api = HfApi(token=token)
user_info = api.whoami()
return user_info.get('name')
except Exception as e:
print(f"Error getting Hugging Face username: {e}")
return None
def main():
"""Upload model to Hugging Face Hub"""
# Check dependencies first
if not check_and_install_dependencies():
return
# Import dependencies after installation check
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from huggingface_hub import login
parser = argparse.ArgumentParser(
description="Upload email classification model to Hugging Face Hub")
parser.add_argument("--model_path", type=str, default="classification_model",
help="Local path to the model files")
parser.add_argument("--hub_model_id", type=str,
help="Hugging Face Hub model ID (e.g., "
"'username/email-classifier-model')")
parser.add_argument("--model_name", type=str, default="email-classifier-model",
help="Name for the model repository "
"(default: email-classifier-model)")
parser.add_argument("--token", type=str,
help="Hugging Face API token (optional, can use "
"environment variable or huggingface-cli login)")
args = parser.parse_args()
# Login if token is provided
if args.token:
login(token=args.token)
# If hub_model_id is not provided, try to get username and construct it
if not args.hub_model_id:
username = get_huggingface_username(args.token)
if not username:
print("Could not determine Hugging Face username. "
"Please provide --hub_model_id explicitly.")
return
args.hub_model_id = f"{username}/{args.model_name}"
print(f"Loading model from {args.model_path}...")
# Load the local model and tokenizer
model = XLMRobertaForSequenceClassification.from_pretrained(args.model_path)
tokenizer = XLMRobertaTokenizer.from_pretrained(args.model_path)
print(f"Uploading model to {args.hub_model_id}...")
try:
# Push to Hugging Face Hub
model.push_to_hub(args.hub_model_id)
tokenizer.push_to_hub(args.hub_model_id)
print("Model successfully uploaded to Hugging Face Hub!")
print(f"You can now use the model with the ID: {args.hub_model_id}")
print(f"Update the MODEL_PATH in Dockerfile to: {args.hub_model_id}")
except Exception as e:
print(f"Error uploading model: {e}")
print("\nPossible solutions:")
print("1. Make sure you're logged in with 'huggingface-cli login'")
print("2. Check that you have permission to create repos in the "
"specified namespace")
print("3. Try using your own username: "
"--hub_model_id yourusername/email-classifier-model")
if __name__ == "__main__":
main()
|