"""Deploy the AnomalyMachine-50K demo to Hugging Face Spaces.""" import argparse import os import shutil from pathlib import Path from dotenv import load_dotenv from huggingface_hub import HfApi, upload_folder def ensure_hf_token(project_root: Path) -> str: """Load the Hugging Face token from environment variables.""" # Try loading from project root .env file first env_file = project_root / ".env" env_loaded = False if env_file.exists(): print(f"Loading .env from: {env_file}") load_dotenv(env_file, override=True) env_loaded = True else: # Fallback: try current directory print("Project root .env not found, trying current directory...") load_dotenv() # Also try spaces_demo/.env if it exists spaces_env = project_root / "spaces_demo" / ".env" if spaces_env.exists(): print(f"Also loading .env from: {spaces_env}") load_dotenv(spaces_env, override=True) # Check environment variable token = os.getenv("HF_TOKEN") if not token: print("\n" + "="*60) print("ERROR: HF_TOKEN not found!") print("="*60) print(f"\nChecked locations:") print(f" 1. {env_file} {'✓' if env_file.exists() else '✗ (not found)'}") print(f" 2. {spaces_env} {'✓' if spaces_env.exists() else '✗ (not found)'}") print(f" 3. Environment variable: {'✓' if os.getenv('HF_TOKEN') else '✗ (not set)'}") print("\nTo fix this:") print("1. Create or edit .env file in the project root:") print(f" {env_file}") print("2. Add your Hugging Face token (must start with 'hf_'):") print(" HF_TOKEN=hf_your_actual_token_here") print("\nOr set it as an environment variable:") print(" $env:HF_TOKEN='hf_your_token' # PowerShell") print(" export HF_TOKEN='hf_your_token' # Bash") print("\nGet your token from: https://huggingface.co/settings/tokens") print("="*60 + "\n") raise EnvironmentError("HF_TOKEN is not set in environment or .env file.") # Remove any quotes or whitespace token = token.strip().strip('"').strip("'").strip() # Show first few characters for debugging (without exposing full token) token_preview = token[:10] + "..." if len(token) > 10 else token[:len(token)] print(f"Token loaded: {token_preview} (length: {len(token)})") # Basic validation if not token.startswith("hf_"): print("\n" + "="*60) print("ERROR: Invalid token format!") print("="*60) print(f"\nYour token starts with: '{token[:5]}...'") print("But it should start with: 'hf_'") print("\nCommon issues:") print("1. Token might have extra spaces or quotes") print("2. Token might be incomplete") print("3. You might be using an old token format") print("\nGet a new token from: https://huggingface.co/settings/tokens") print("Make sure to copy the FULL token (it should start with 'hf_')") print("="*60 + "\n") raise ValueError("HF_TOKEN must start with 'hf_'. Please check your token.") return token def copy_example_audio_files( source_output_dir: Path, target_examples_dir: Path, num_per_machine: int = 1 ) -> None: """Copy example audio files from generated dataset to examples directory.""" source_audio_dir = source_output_dir / "audio" if not source_audio_dir.exists(): print(f"Warning: Source audio directory not found at {source_audio_dir}") return target_examples_dir.mkdir(parents=True, exist_ok=True) machine_types = ["fan", "pump", "compressor", "conveyor_belt", "electric_motor", "valve"] copied_count = 0 for machine in machine_types: # Find anomalous examples for this machine pattern = f"{machine}_*_anomalous_*.wav" matching_files = list(source_audio_dir.glob(pattern)) if matching_files: # Copy first N anomalous examples for audio_file in matching_files[:num_per_machine]: dest_path = target_examples_dir / audio_file.name shutil.copy2(audio_file, dest_path) copied_count += 1 print(f"Copied: {audio_file.name}") print(f"Copied {copied_count} example audio files to {target_examples_dir}") def deploy_space( spaces_demo_dir: Path, output_dir: Path, repo_id: str, token: str, private: bool = False, ) -> None: """Deploy the demo to Hugging Face Spaces.""" api = HfApi(token=token) # Verify token works by checking user info try: user_info = api.whoami(token=token) print(f"Authenticated as: {user_info.get('name', 'Unknown')}") except Exception as e: print(f"\n❌ Authentication failed: {e}") print("\nTroubleshooting:") print("1. Your HF_TOKEN may be invalid or expired") print("2. Get a new token at: https://huggingface.co/settings/tokens") print("3. Make sure the token has 'write' permissions") print("4. Update your .env file with the new token") raise # Check if repo exists try: api.repo_info(repo_id, repo_type="space", token=token) print(f"Space '{repo_id}' already exists. Updating...") except Exception: print(f"Creating new Space '{repo_id}'...") try: api.create_repo( repo_id=repo_id, token=token, repo_type="space", exist_ok=True, space_sdk="gradio", ) print(f"✅ Space created successfully!") except Exception as create_error: print(f"\n❌ Error creating Space: {create_error}") print("\nTroubleshooting:") print("1. Verify your HF_TOKEN is valid and not expired") print("2. Check that the token has 'write' permissions") print("3. Get a new token at: https://huggingface.co/settings/tokens") username = repo_id.split('/')[0] print(f"4. Make sure your username '{username}' is correct") print(f"5. Check if you have permission to create Spaces under '{username}'") raise # Copy example audio files from generated dataset examples_dir = spaces_demo_dir / "examples" if output_dir.exists(): print("Copying example audio files from generated dataset...") copy_example_audio_files(output_dir, examples_dir, num_per_machine=1) else: print(f"Warning: Output directory not found at {output_dir}. Skipping example files.") # Ensure README.md doesn't have emoji field (fix encoding issues) readme_path = spaces_demo_dir / "README.md" if readme_path.exists(): print("Ensuring README.md has correct format...") with open(readme_path, "r", encoding="utf-8") as f: content = f.read() # Remove emoji line if it exists (to avoid encoding issues) lines = content.split("\n") filtered_lines = [] for line in lines: if line.strip().startswith("emoji:"): print(f" Removing emoji line: {line.strip()}") continue filtered_lines.append(line) # Write back without emoji content_fixed = "\n".join(filtered_lines) with open(readme_path, "w", encoding="utf-8", newline="\n") as f: f.write(content_fixed) print("README.md cleaned and saved with UTF-8 encoding.") # Upload all files in spaces_demo directory print(f"Uploading files from {spaces_demo_dir}...") upload_folder( folder_path=str(spaces_demo_dir), repo_id=repo_id, repo_type="space", token=token, ignore_patterns=[".git", "__pycache__", "*.pyc", ".env"], ) space_url = f"https://huggingface.co/spaces/{repo_id}" print(f"\n✅ Space deployed successfully!") print(f"🌐 Live at: {space_url}") def parse_args() -> argparse.Namespace: """Parse command-line arguments.""" parser = argparse.ArgumentParser( description="Deploy AnomalyMachine-50K demo to Hugging Face Spaces." ) parser.add_argument( "--output_dir", type=str, default=os.path.join("..", "output"), help="Path to generated dataset output directory.", ) parser.add_argument( "--repo_id", type=str, default="mandipgoswami/AnomalyMachine-Demo", help="Target Hugging Face Space repository id.", ) parser.add_argument( "--private", action="store_true", help="Create the Space as private.", ) return parser.parse_args() def main() -> None: """Entry point for CLI execution.""" args = parse_args() # Determine paths script_dir = Path(__file__).resolve().parent spaces_demo_dir = script_dir project_root = script_dir.parent output_dir = Path(args.output_dir).resolve() if not output_dir.is_absolute(): output_dir = project_root / args.output_dir token = ensure_hf_token(project_root) deploy_space( spaces_demo_dir=spaces_demo_dir, output_dir=output_dir, repo_id=args.repo_id, token=token, private=args.private, ) if __name__ == "__main__": main()