Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Quick script to update your Hugging Face Space for phi-4-unsloth-bnb-4bit training. | |
| This script handles the specific requirements for the 4-bit quantized Phi-4 model training, | |
| including proper configuration and dependency management. | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import subprocess | |
| import argparse | |
| import logging | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, login | |
| import getpass | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| handlers=[logging.StreamHandler(sys.stdout)] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def load_env_variables(): | |
| """Load environment variables from system or .env file.""" | |
| # First try to load from local .env file | |
| try: | |
| from dotenv import load_dotenv | |
| env_path = Path(__file__).parent / ".env" | |
| if env_path.exists(): | |
| # Load and explicitly set environment variables | |
| with open(env_path) as f: | |
| for line in f: | |
| if line.strip() and not line.startswith('#'): | |
| key, value = line.strip().split('=', 1) | |
| os.environ[key] = value.strip() | |
| logger.info(f"Loaded environment variables from {env_path}") | |
| else: | |
| logger.warning(f"No .env file found at {env_path}") | |
| except ImportError: | |
| logger.warning("python-dotenv not installed, skipping .env loading") | |
| # Check if we're running in a Hugging Face Space | |
| if os.environ.get("SPACE_ID"): | |
| logger.info("Running in Hugging Face Space") | |
| if "/" in os.environ.get("SPACE_ID", ""): | |
| username = os.environ.get("SPACE_ID").split("/")[0] | |
| os.environ["HF_USERNAME"] = username | |
| logger.info(f"Set HF_USERNAME from SPACE_ID: {username}") | |
| # Verify required variables | |
| required_vars = { | |
| "HF_TOKEN": os.environ.get("HF_TOKEN"), | |
| "HF_USERNAME": os.environ.get("HF_USERNAME"), | |
| "HF_SPACE_NAME": os.environ.get("HF_SPACE_NAME", "phi4training") | |
| } | |
| # Ensure the space name is set correctly | |
| if "HF_SPACE_NAME" not in os.environ: | |
| os.environ["HF_SPACE_NAME"] = "phi4training" | |
| missing_vars = [k for k, v in required_vars.items() if not v] | |
| if missing_vars: | |
| raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") | |
| logger.info(f"Using environment variables: USERNAME={required_vars['HF_USERNAME']}, SPACE_NAME={required_vars['HF_SPACE_NAME']}") | |
| return required_vars | |
| def verify_configs(): | |
| """Verify that all necessary configuration files exist and are valid.""" | |
| current_dir = Path(__file__).parent | |
| required_files = [ | |
| "transformers_config.json", | |
| "requirements.txt", | |
| "run_transformers_training.py" | |
| ] | |
| missing_files = [] | |
| for file in required_files: | |
| if not (current_dir / file).exists(): | |
| missing_files.append(file) | |
| if missing_files: | |
| raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
| # Verify JSON configs | |
| json_files = [f for f in required_files if f.endswith('.json')] | |
| for json_file in json_files: | |
| try: | |
| with open(current_dir / json_file) as f: | |
| json.load(f) | |
| logger.info(f"Verified {json_file} is valid JSON") | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Invalid JSON in {json_file}: {e}") | |
| def update_requirements(): | |
| """Update requirements.txt with necessary packages.""" | |
| current_dir = Path(__file__).parent | |
| req_path = current_dir / "requirements.txt" | |
| required_packages = { | |
| "torch>=2.0.0", | |
| "transformers>=4.36.0", | |
| "accelerate>=0.27.0", | |
| "bitsandbytes>=0.41.0", | |
| "tensorboard>=2.15.0", | |
| "gradio>=5.17.0", | |
| "huggingface-hub>=0.19.0", | |
| "datasets>=2.15.0" | |
| } | |
| # Read existing requirements | |
| existing_requirements = set() | |
| if req_path.exists(): | |
| with open(req_path) as f: | |
| existing_requirements = {line.strip() for line in f if line.strip()} | |
| # Add new requirements | |
| updated_requirements = existing_requirements.union(required_packages) | |
| # Write updated requirements | |
| with open(req_path, 'w') as f: | |
| for req in sorted(updated_requirements): | |
| f.write(f"{req}\n") | |
| logger.info("Updated requirements.txt with necessary packages") | |
| def create_space(username, space_name): | |
| """Create or get a Hugging Face Space.""" | |
| try: | |
| api = HfApi() | |
| space_id = f"{username}/{space_name}" | |
| logger.info(f"Checking Space {space_id}...") | |
| # First try to get the space | |
| try: | |
| space_info = api.space_info(repo_id=space_id) | |
| logger.info(f"Space {space_id} already exists") | |
| return space_info | |
| except Exception as e: | |
| logger.info(f"Space {space_id} does not exist, creating new space...") | |
| # Create new space | |
| try: | |
| api.create_repo( | |
| repo_id=space_id, | |
| private=False, | |
| repo_type="space", | |
| space_sdk="gradio" | |
| ) | |
| logger.info(f"Created new space: {space_id}") | |
| return api.space_info(repo_id=space_id) | |
| except Exception as e: | |
| logger.error(f"Failed to create space: {str(e)}") | |
| raise | |
| except Exception as e: | |
| raise RuntimeError(f"Error with Space {space_id}: {str(e)}") | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Update Hugging Face Space for Phi-4 training') | |
| parser.add_argument('--space_name', type=str, help='Space name (default: from env)') | |
| parser.add_argument('--force', action='store_true', help='Skip confirmation') | |
| args = parser.parse_args() | |
| if not args.force: | |
| print("\n" + "!"*80) | |
| print("WARNING: Updating the Space will INTERRUPT any ongoing training!") | |
| print("Make sure all checkpoints are saved before proceeding.") | |
| print("!"*80 + "\n") | |
| confirm = input("Type 'update' to confirm: ") | |
| if confirm.lower() != 'update': | |
| logger.info("Update cancelled") | |
| return False | |
| try: | |
| # Load environment variables | |
| env_vars = load_env_variables() | |
| logger.info(f"Environment variables loaded: USERNAME={env_vars['HF_USERNAME']}, SPACE_NAME={env_vars['HF_SPACE_NAME']}") | |
| # Verify configurations | |
| verify_configs() | |
| logger.info("All configuration files verified successfully") | |
| # Update requirements | |
| update_requirements() | |
| logger.info("Requirements updated successfully") | |
| # Get space name from args or env, prioritize args | |
| space_name = args.space_name if args.space_name else env_vars["HF_SPACE_NAME"] | |
| logger.info(f"Using space name: {space_name}") | |
| # Login to Hugging Face | |
| logger.info("Logging in to Hugging Face...") | |
| login(token=env_vars["HF_TOKEN"]) | |
| logger.info("Successfully logged in to Hugging Face") | |
| # Create/get space | |
| space_info = create_space(env_vars["HF_USERNAME"], space_name) | |
| logger.info(f"Space info: {space_info}") | |
| # Upload files | |
| current_dir = Path(__file__).parent | |
| logger.info(f"Uploading files from {current_dir} to Space {env_vars['HF_USERNAME']}/{space_name}...") | |
| # Create .gitignore | |
| with open(current_dir / ".gitignore", "w") as f: | |
| f.write(".env\n*.pyc\n__pycache__\n") | |
| logger.info("Created .gitignore file") | |
| api = HfApi() | |
| api.upload_folder( | |
| folder_path=str(current_dir), | |
| repo_id=f"{env_vars['HF_USERNAME']}/{space_name}", | |
| repo_type="space", | |
| ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] | |
| ) | |
| logger.info(f"Files uploaded successfully") | |
| space_url = f"https://huggingface.co/spaces/{env_vars['HF_USERNAME']}/{space_name}" | |
| logger.info(f"Space URL: {space_url}") | |
| print(f"\nSpace created successfully! You can view it at:\n{space_url}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error updating Space: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |