Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Quick script to update your Hugging Face Space for phi-4-unsloth-bnb-4bit training. | |
| This script handles the specific requirements for the 4-bit quantized Phi-4 model training, | |
| including proper configuration and dependency management. | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import subprocess | |
| import argparse | |
| import logging | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, login | |
| import getpass | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| handlers=[logging.StreamHandler(sys.stdout)] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def load_env_variables(): | |
| """Load environment variables from system or .env file.""" | |
| # Define default values that should be used | |
| required_vars = { | |
| "HF_USERNAME": os.environ.get("HF_USERNAME", "George-API"), | |
| "HF_SPACE_NAME": "phi4training" # Hardcode the correct space name | |
| } | |
| # First try to load from local .env file | |
| try: | |
| from dotenv import load_dotenv | |
| env_path = Path(__file__).parent / ".env" | |
| if env_path.exists(): | |
| # Load and explicitly set environment variables | |
| with open(env_path) as f: | |
| for line in f: | |
| if line.strip() and not line.startswith('#'): | |
| key, value = line.strip().split('=', 1) | |
| os.environ[key] = value.strip() | |
| logger.info(f"Loaded environment variables from {env_path}") | |
| else: | |
| logger.warning(f"No .env file found at {env_path}") | |
| except ImportError: | |
| logger.warning("python-dotenv not installed, skipping .env loading") | |
| # Check if we're running in a Hugging Face Space | |
| if os.environ.get("SPACE_ID"): | |
| logger.info("Running in Hugging Face Space") | |
| if "/" in os.environ.get("SPACE_ID", ""): | |
| username = os.environ.get("SPACE_ID").split("/")[0] | |
| os.environ["HF_USERNAME"] = username | |
| logger.info(f"Set HF_USERNAME from SPACE_ID: {username}") | |
| # Always ensure we have the required variables | |
| # And override HF_SPACE_NAME to ensure we use phi4training | |
| result = { | |
| "HF_TOKEN": os.environ.get("HF_TOKEN", ""), | |
| "HF_USERNAME": os.environ.get("HF_USERNAME", required_vars["HF_USERNAME"]), | |
| "HF_SPACE_NAME": required_vars["HF_SPACE_NAME"] # Always use phi4training | |
| } | |
| # Ensure the space name is set correctly in environment | |
| os.environ["HF_SPACE_NAME"] = required_vars["HF_SPACE_NAME"] | |
| logger.info(f"Using environment variables: USERNAME={result['HF_USERNAME']}, SPACE_NAME={result['HF_SPACE_NAME']}") | |
| return result | |
| def verify_configs(): | |
| """Verify that all necessary configuration files exist and are valid.""" | |
| current_dir = Path(__file__).parent | |
| required_files = [ | |
| "transformers_config.json", | |
| "requirements.txt", | |
| "run_transformers_training.py" | |
| ] | |
| missing_files = [] | |
| for file in required_files: | |
| if not (current_dir / file).exists(): | |
| missing_files.append(file) | |
| if missing_files: | |
| raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}") | |
| # Verify JSON configs | |
| json_files = [f for f in required_files if f.endswith('.json')] | |
| for json_file in json_files: | |
| try: | |
| with open(current_dir / json_file) as f: | |
| json.load(f) | |
| logger.info(f"Verified {json_file} is valid JSON") | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Invalid JSON in {json_file}: {e}") | |
| def update_requirements(): | |
| """Update consolidated requirements.txt with all necessary packages in the correct order.""" | |
| logger.info("Setting up consolidated requirements file...") | |
| current_dir = Path(__file__).parent | |
| req_path = current_dir / "requirements.txt" | |
| # All required packages in the correct installation order | |
| required_packages = [ | |
| # Base requirements (install first) | |
| "torch>=2.0.0", | |
| "accelerate>=0.27.0", | |
| "bitsandbytes>=0.41.0", | |
| "transformers>=4.36.0", | |
| "datasets>=2.15.0", | |
| "huggingface-hub>=0.19.0", | |
| "tensorboard>=2.15.0", | |
| # Main requirements (install second) | |
| "einops>=0.7.0", | |
| "filelock>=3.13.1", | |
| "gradio>=5.17.0", | |
| "matplotlib>=3.7.0", | |
| "numpy>=1.24.0", | |
| "packaging>=23.0", | |
| "peft>=0.9.0", | |
| "psutil>=5.9.0", | |
| "python-dotenv>=1.0.0", | |
| "pyyaml>=6.0.1", | |
| "regex>=2023.0.0", | |
| "requests>=2.31.0", | |
| "safetensors>=0.4.1", | |
| "sentencepiece>=0.1.99", | |
| "tqdm>=4.65.0", | |
| "typing-extensions>=4.8.0", | |
| "unsloth>=2024.3" | |
| ] | |
| # Optional packages (commented out by default) | |
| optional_packages = [ | |
| "flash-attn==2.5.2" | |
| ] | |
| # Create consolidated requirements file | |
| with open(req_path, 'w') as f: | |
| f.write("# BASE REQUIREMENTS - Install these critical dependencies first\n") | |
| f.write("# ---------------------------------------------------------------------\n") | |
| # Write base dependencies first | |
| for i, req in enumerate(required_packages): | |
| if i == 7: # After base requirements | |
| f.write("\n# MAIN REQUIREMENTS - Install these after base dependencies\n") | |
| f.write("# ---------------------------------------------------------------------\n") | |
| f.write(f"{req}\n") | |
| # Add optional dependencies section | |
| f.write("\n# OPTIONAL DEPENDENCIES - Install these last (if needed)\n") | |
| f.write("# ---------------------------------------------------------------------\n") | |
| for opt_pkg in optional_packages: | |
| f.write(f"# {opt_pkg}\n") | |
| logger.info(f"Updated consolidated requirements file at {req_path}") | |
| logger.info("Requirements are ordered for proper dependency installation") | |
| # Remove old requirements files if they exist | |
| old_files = ["requirements-base.txt", "requirements-flash.txt"] | |
| for old_file in old_files: | |
| old_path = current_dir / old_file | |
| if old_path.exists(): | |
| old_path.unlink() | |
| logger.info(f"Removed old requirements file: {old_file}") | |
| return True | |
| def create_space(username, space_name): | |
| """Create or get a Hugging Face Space.""" | |
| # Override with the correct values regardless of what's passed | |
| username = "George-API" | |
| space_name = "phi4training" | |
| try: | |
| api = HfApi() | |
| space_id = f"{username}/{space_name}" | |
| logger.info(f"Checking Space {space_id}...") | |
| # First try to get the space | |
| try: | |
| space_info = api.space_info(repo_id=space_id) | |
| logger.info(f"Space {space_id} already exists") | |
| return space_info | |
| except Exception: | |
| logger.info(f"Space {space_id} does not exist, creating new space...") | |
| # Create new space | |
| api.create_repo( | |
| repo_id=space_id, | |
| private=False, | |
| repo_type="space", | |
| space_sdk="gradio" | |
| ) | |
| logger.info(f"Created new space: {space_id}") | |
| return api.space_info(repo_id=space_id) | |
| except Exception as e: | |
| logger.error(f"Failed to create space: {str(e)}") | |
| # Don't proceed if we can't create/access the space | |
| raise RuntimeError(f"Error with Space {space_id}: {str(e)}") | |
| def main(): | |
| """Main function to update the Space.""" | |
| try: | |
| # Parse command line arguments | |
| parser = argparse.ArgumentParser(description='Update Hugging Face Space for Phi-4 training') | |
| parser.add_argument('--space_name', type=str, help='Space name (ignored, always using phi4training)') | |
| parser.add_argument('--force', action='store_true', help='Skip confirmation when updating Space') | |
| args = parser.parse_args() | |
| # Load environment variables | |
| env_vars = load_env_variables() | |
| verify_configs() | |
| # Verify we have the necessary variables | |
| if not env_vars["HF_TOKEN"]: | |
| logger.error("Missing HF_TOKEN. Please set it in your .env file or environment variables.") | |
| return False | |
| logger.info(f"Environment variables loaded: USERNAME={env_vars['HF_USERNAME']}, SPACE_NAME={env_vars['HF_SPACE_NAME']}") | |
| # Ask for confirmation unless forced | |
| if not args.force: | |
| print("\nWARNING: Updating the Space will INTERRUPT any ongoing training!") | |
| confirm = input("Are you sure you want to update the Space? Type 'yes' to confirm: ") | |
| if confirm.lower() != 'yes': | |
| logger.info("Update cancelled by user") | |
| return False | |
| # Additional password check for safety | |
| password = getpass.getpass("Enter your password to confirm update: ") | |
| if password.strip() == "": | |
| logger.info("No password entered. Update cancelled.") | |
| return False | |
| else: | |
| logger.info("Skipping confirmation due to --force flag") | |
| # Update requirements | |
| update_requirements() | |
| logger.info("Requirements updated successfully") | |
| # Always use phi4training as the space name regardless of arguments | |
| space_name = "phi4training" | |
| logger.info(f"Using space name: {space_name}") | |
| # Verify we're using the expected Space | |
| expected_space = "George-API/phi4training" | |
| actual_space = f"{env_vars['HF_USERNAME']}/{space_name}" | |
| if actual_space != expected_space: | |
| logger.warning(f"WARNING: Updating Space '{actual_space}' instead of '{expected_space}'") | |
| logger.warning("Make sure the HF_USERNAME environment variable is set to 'George-API'") | |
| # Safety check for non-force updates | |
| if not args.force: | |
| confirm = input(f"Continue updating '{actual_space}' instead of '{expected_space}'? (yes/no): ") | |
| if confirm.lower() != "yes": | |
| logger.info("Update cancelled by user") | |
| return False | |
| else: | |
| logger.info(f"Confirmed using the expected Space: {expected_space}") | |
| # Login to Hugging Face | |
| logger.info("Logging in to Hugging Face...") | |
| try: | |
| login(token=env_vars["HF_TOKEN"]) | |
| logger.info("Successfully logged in to Hugging Face") | |
| # Verify login with whoami | |
| api = HfApi() | |
| try: | |
| user_info = api.whoami() | |
| logger.info(f"Authenticated as: {user_info['name']}") | |
| except Exception as e: | |
| logger.error(f"Authentication verification failed: {str(e)}") | |
| logger.error("Your HF_TOKEN may be invalid or expired.") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Login failed: {str(e)}") | |
| logger.error("Make sure your HF_TOKEN is valid and not expired.") | |
| return False | |
| # Create/get space | |
| space_info = create_space(env_vars["HF_USERNAME"], space_name) | |
| logger.info(f"Space info: {space_info}") | |
| # Upload files | |
| current_dir = Path(__file__).parent | |
| logger.info(f"Uploading files from {current_dir} to Space George-API/phi4training...") | |
| # Create .gitignore | |
| with open(current_dir / ".gitignore", "w") as f: | |
| f.write(".env\n*.pyc\n__pycache__\n") | |
| logger.info("Created .gitignore file") | |
| api = HfApi() | |
| api.upload_folder( | |
| folder_path=str(current_dir), | |
| repo_id="George-API/phi4training", # Hardcoded repo ID | |
| repo_type="space", | |
| ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"] | |
| ) | |
| logger.info(f"Files uploaded successfully") | |
| space_url = "https://huggingface.co/spaces/George-API/phi4training" | |
| logger.info(f"Space URL: {space_url}") | |
| print(f"\nSpace created successfully! You can view it at:\n{space_url}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error updating Space: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |