|
|
""" |
|
|
Script to upload the email classification model to Hugging Face Hub |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import argparse |
|
|
import subprocess |
|
|
import pkg_resources |
|
|
|
|
|
|
|
|
def check_and_install_dependencies(): |
|
|
"""Check for required libraries and install if missing""" |
|
|
required_packages = ['torch', 'transformers', 'sentencepiece'] |
|
|
installed_packages = {pkg.key for pkg in pkg_resources.working_set} |
|
|
|
|
|
missing_packages = [pkg for pkg in required_packages if pkg not in installed_packages] |
|
|
|
|
|
if missing_packages: |
|
|
missing_packages_str = ", ".join(missing_packages) |
|
|
print(f"Installing missing dependencies: {missing_packages_str}") |
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install"] |
|
|
+ missing_packages) |
|
|
print("Dependencies installed. You may need to restart the script.") |
|
|
return False |
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
def get_huggingface_username(token=None): |
|
|
"""Get the username for the authenticated user""" |
|
|
try: |
|
|
from huggingface_hub import HfApi |
|
|
api = HfApi(token=token) |
|
|
user_info = api.whoami() |
|
|
return user_info.get('name') |
|
|
except Exception as e: |
|
|
print(f"Error getting Hugging Face username: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Upload model to Hugging Face Hub""" |
|
|
|
|
|
if not check_and_install_dependencies(): |
|
|
return |
|
|
|
|
|
|
|
|
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer |
|
|
from huggingface_hub import login |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
|
description="Upload email classification model to Hugging Face Hub") |
|
|
parser.add_argument("--model_path", type=str, default="classification_model", |
|
|
help="Local path to the model files") |
|
|
parser.add_argument("--hub_model_id", type=str, |
|
|
help="Hugging Face Hub model ID (e.g., " |
|
|
"'username/email-classifier-model')") |
|
|
parser.add_argument("--model_name", type=str, default="email-classifier-model", |
|
|
help="Name for the model repository " |
|
|
"(default: email-classifier-model)") |
|
|
parser.add_argument("--token", type=str, |
|
|
help="Hugging Face API token (optional, can use " |
|
|
"environment variable or huggingface-cli login)") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if args.token: |
|
|
login(token=args.token) |
|
|
|
|
|
|
|
|
if not args.hub_model_id: |
|
|
username = get_huggingface_username(args.token) |
|
|
if not username: |
|
|
print("Could not determine Hugging Face username. " |
|
|
"Please provide --hub_model_id explicitly.") |
|
|
return |
|
|
args.hub_model_id = f"{username}/{args.model_name}" |
|
|
|
|
|
print(f"Loading model from {args.model_path}...") |
|
|
|
|
|
model = XLMRobertaForSequenceClassification.from_pretrained(args.model_path) |
|
|
tokenizer = XLMRobertaTokenizer.from_pretrained(args.model_path) |
|
|
|
|
|
print(f"Uploading model to {args.hub_model_id}...") |
|
|
try: |
|
|
|
|
|
model.push_to_hub(args.hub_model_id) |
|
|
tokenizer.push_to_hub(args.hub_model_id) |
|
|
|
|
|
print("Model successfully uploaded to Hugging Face Hub!") |
|
|
print(f"You can now use the model with the ID: {args.hub_model_id}") |
|
|
print(f"Update the MODEL_PATH in Dockerfile to: {args.hub_model_id}") |
|
|
except Exception as e: |
|
|
print(f"Error uploading model: {e}") |
|
|
print("\nPossible solutions:") |
|
|
print("1. Make sure you're logged in with 'huggingface-cli login'") |
|
|
print("2. Check that you have permission to create repos in the " |
|
|
"specified namespace") |
|
|
print("3. Try using your own username: " |
|
|
"--hub_model_id yourusername/email-classifier-model") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|