File size: 4,036 Bytes
b7c31ab
 
 
 
 
 
 
 
 
edc8356
b7c31ab
 
 
 
edc8356
b7c31ab
edc8356
b7c31ab
edc8356
 
 
 
b7c31ab
 
edc8356
b7c31ab
 
edc8356
b7c31ab
 
 
 
 
 
 
 
 
 
 
edc8356
b7c31ab
 
 
 
 
edc8356
b7c31ab
 
 
edc8356
 
 
b7c31ab
 
 
edc8356
 
b7c31ab
edc8356
 
b7c31ab
edc8356
 
 
b7c31ab
edc8356
b7c31ab
 
 
edc8356
b7c31ab
 
 
 
edc8356
 
b7c31ab
 
edc8356
b7c31ab
 
 
 
edc8356
b7c31ab
 
 
 
 
edc8356
b7c31ab
 
 
 
 
 
 
edc8356
 
 
 
 
b7c31ab
 
edc8356
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
Script to upload the email classification model to Hugging Face Hub
"""

import sys
import argparse
import subprocess
import pkg_resources


def check_and_install_dependencies():
    """Check for required libraries and install if missing"""
    required_packages = ['torch', 'transformers', 'sentencepiece']
    installed_packages = {pkg.key for pkg in pkg_resources.working_set}

    missing_packages = [pkg for pkg in required_packages if pkg not in installed_packages]

    if missing_packages:
        missing_packages_str = ", ".join(missing_packages)
        print(f"Installing missing dependencies: {missing_packages_str}")
        subprocess.check_call([sys.executable, "-m", "pip", "install"]
                              + missing_packages)
        print("Dependencies installed. You may need to restart the script.")
        return False

    return True


def get_huggingface_username(token=None):
    """Get the username for the authenticated user"""
    try:
        from huggingface_hub import HfApi
        api = HfApi(token=token)
        user_info = api.whoami()
        return user_info.get('name')
    except Exception as e:
        print(f"Error getting Hugging Face username: {e}")
        return None


def main():
    """Upload model to Hugging Face Hub"""
    # Check dependencies first
    if not check_and_install_dependencies():
        return

    # Import dependencies after installation check
    from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
    from huggingface_hub import login

    parser = argparse.ArgumentParser(
        description="Upload email classification model to Hugging Face Hub")
    parser.add_argument("--model_path", type=str, default="classification_model",
                        help="Local path to the model files")
    parser.add_argument("--hub_model_id", type=str,
                        help="Hugging Face Hub model ID (e.g., "
                             "'username/email-classifier-model')")
    parser.add_argument("--model_name", type=str, default="email-classifier-model",
                        help="Name for the model repository "
                             "(default: email-classifier-model)")
    parser.add_argument("--token", type=str,
                        help="Hugging Face API token (optional, can use "
                             "environment variable or huggingface-cli login)")

    args = parser.parse_args()

    # Login if token is provided
    if args.token:
        login(token=args.token)

    # If hub_model_id is not provided, try to get username and construct it
    if not args.hub_model_id:
        username = get_huggingface_username(args.token)
        if not username:
            print("Could not determine Hugging Face username. "
                  "Please provide --hub_model_id explicitly.")
            return
        args.hub_model_id = f"{username}/{args.model_name}"

    print(f"Loading model from {args.model_path}...")
    # Load the local model and tokenizer
    model = XLMRobertaForSequenceClassification.from_pretrained(args.model_path)
    tokenizer = XLMRobertaTokenizer.from_pretrained(args.model_path)

    print(f"Uploading model to {args.hub_model_id}...")
    try:
        # Push to Hugging Face Hub
        model.push_to_hub(args.hub_model_id)
        tokenizer.push_to_hub(args.hub_model_id)

        print("Model successfully uploaded to Hugging Face Hub!")
        print(f"You can now use the model with the ID: {args.hub_model_id}")
        print(f"Update the MODEL_PATH in Dockerfile to: {args.hub_model_id}")
    except Exception as e:
        print(f"Error uploading model: {e}")
        print("\nPossible solutions:")
        print("1. Make sure you're logged in with 'huggingface-cli login'")
        print("2. Check that you have permission to create repos in the "
              "specified namespace")
        print("3. Try using your own username: "
              "--hub_model_id yourusername/email-classifier-model")


if __name__ == "__main__":
    main()