AnomalyMachine-Demo / deploy_spaces.py
mandipgoswami's picture
Upload folder using huggingface_hub
75c0e17 verified
"""Deploy the AnomalyMachine-50K demo to Hugging Face Spaces."""
import argparse
import os
import shutil
from pathlib import Path
from dotenv import load_dotenv
from huggingface_hub import HfApi, upload_folder
def ensure_hf_token(project_root: Path) -> str:
"""Load the Hugging Face token from environment variables."""
# Try loading from project root .env file first
env_file = project_root / ".env"
env_loaded = False
if env_file.exists():
print(f"Loading .env from: {env_file}")
load_dotenv(env_file, override=True)
env_loaded = True
else:
# Fallback: try current directory
print("Project root .env not found, trying current directory...")
load_dotenv()
# Also try spaces_demo/.env if it exists
spaces_env = project_root / "spaces_demo" / ".env"
if spaces_env.exists():
print(f"Also loading .env from: {spaces_env}")
load_dotenv(spaces_env, override=True)
# Check environment variable
token = os.getenv("HF_TOKEN")
if not token:
print("\n" + "="*60)
print("ERROR: HF_TOKEN not found!")
print("="*60)
print(f"\nChecked locations:")
print(f" 1. {env_file} {'βœ“' if env_file.exists() else 'βœ— (not found)'}")
print(f" 2. {spaces_env} {'βœ“' if spaces_env.exists() else 'βœ— (not found)'}")
print(f" 3. Environment variable: {'βœ“' if os.getenv('HF_TOKEN') else 'βœ— (not set)'}")
print("\nTo fix this:")
print("1. Create or edit .env file in the project root:")
print(f" {env_file}")
print("2. Add your Hugging Face token (must start with 'hf_'):")
print(" HF_TOKEN=hf_your_actual_token_here")
print("\nOr set it as an environment variable:")
print(" $env:HF_TOKEN='hf_your_token' # PowerShell")
print(" export HF_TOKEN='hf_your_token' # Bash")
print("\nGet your token from: https://huggingface.co/settings/tokens")
print("="*60 + "\n")
raise EnvironmentError("HF_TOKEN is not set in environment or .env file.")
# Remove any quotes or whitespace
token = token.strip().strip('"').strip("'").strip()
# Show first few characters for debugging (without exposing full token)
token_preview = token[:10] + "..." if len(token) > 10 else token[:len(token)]
print(f"Token loaded: {token_preview} (length: {len(token)})")
# Basic validation
if not token.startswith("hf_"):
print("\n" + "="*60)
print("ERROR: Invalid token format!")
print("="*60)
print(f"\nYour token starts with: '{token[:5]}...'")
print("But it should start with: 'hf_'")
print("\nCommon issues:")
print("1. Token might have extra spaces or quotes")
print("2. Token might be incomplete")
print("3. You might be using an old token format")
print("\nGet a new token from: https://huggingface.co/settings/tokens")
print("Make sure to copy the FULL token (it should start with 'hf_')")
print("="*60 + "\n")
raise ValueError("HF_TOKEN must start with 'hf_'. Please check your token.")
return token
def copy_example_audio_files(
source_output_dir: Path, target_examples_dir: Path, num_per_machine: int = 1
) -> None:
"""Copy example audio files from generated dataset to examples directory."""
source_audio_dir = source_output_dir / "audio"
if not source_audio_dir.exists():
print(f"Warning: Source audio directory not found at {source_audio_dir}")
return
target_examples_dir.mkdir(parents=True, exist_ok=True)
machine_types = ["fan", "pump", "compressor", "conveyor_belt", "electric_motor", "valve"]
copied_count = 0
for machine in machine_types:
# Find anomalous examples for this machine
pattern = f"{machine}_*_anomalous_*.wav"
matching_files = list(source_audio_dir.glob(pattern))
if matching_files:
# Copy first N anomalous examples
for audio_file in matching_files[:num_per_machine]:
dest_path = target_examples_dir / audio_file.name
shutil.copy2(audio_file, dest_path)
copied_count += 1
print(f"Copied: {audio_file.name}")
print(f"Copied {copied_count} example audio files to {target_examples_dir}")
def deploy_space(
spaces_demo_dir: Path,
output_dir: Path,
repo_id: str,
token: str,
private: bool = False,
) -> None:
"""Deploy the demo to Hugging Face Spaces."""
api = HfApi(token=token)
# Verify token works by checking user info
try:
user_info = api.whoami(token=token)
print(f"Authenticated as: {user_info.get('name', 'Unknown')}")
except Exception as e:
print(f"\n❌ Authentication failed: {e}")
print("\nTroubleshooting:")
print("1. Your HF_TOKEN may be invalid or expired")
print("2. Get a new token at: https://huggingface.co/settings/tokens")
print("3. Make sure the token has 'write' permissions")
print("4. Update your .env file with the new token")
raise
# Check if repo exists
try:
api.repo_info(repo_id, repo_type="space", token=token)
print(f"Space '{repo_id}' already exists. Updating...")
except Exception:
print(f"Creating new Space '{repo_id}'...")
try:
api.create_repo(
repo_id=repo_id,
token=token,
repo_type="space",
exist_ok=True,
space_sdk="gradio",
)
print(f"βœ… Space created successfully!")
except Exception as create_error:
print(f"\n❌ Error creating Space: {create_error}")
print("\nTroubleshooting:")
print("1. Verify your HF_TOKEN is valid and not expired")
print("2. Check that the token has 'write' permissions")
print("3. Get a new token at: https://huggingface.co/settings/tokens")
username = repo_id.split('/')[0]
print(f"4. Make sure your username '{username}' is correct")
print(f"5. Check if you have permission to create Spaces under '{username}'")
raise
# Copy example audio files from generated dataset
examples_dir = spaces_demo_dir / "examples"
if output_dir.exists():
print("Copying example audio files from generated dataset...")
copy_example_audio_files(output_dir, examples_dir, num_per_machine=1)
else:
print(f"Warning: Output directory not found at {output_dir}. Skipping example files.")
# Ensure README.md doesn't have emoji field (fix encoding issues)
readme_path = spaces_demo_dir / "README.md"
if readme_path.exists():
print("Ensuring README.md has correct format...")
with open(readme_path, "r", encoding="utf-8") as f:
content = f.read()
# Remove emoji line if it exists (to avoid encoding issues)
lines = content.split("\n")
filtered_lines = []
for line in lines:
if line.strip().startswith("emoji:"):
print(f" Removing emoji line: {line.strip()}")
continue
filtered_lines.append(line)
# Write back without emoji
content_fixed = "\n".join(filtered_lines)
with open(readme_path, "w", encoding="utf-8", newline="\n") as f:
f.write(content_fixed)
print("README.md cleaned and saved with UTF-8 encoding.")
# Upload all files in spaces_demo directory
print(f"Uploading files from {spaces_demo_dir}...")
upload_folder(
folder_path=str(spaces_demo_dir),
repo_id=repo_id,
repo_type="space",
token=token,
ignore_patterns=[".git", "__pycache__", "*.pyc", ".env"],
)
space_url = f"https://huggingface.co/spaces/{repo_id}"
print(f"\nβœ… Space deployed successfully!")
print(f"🌐 Live at: {space_url}")
def parse_args() -> argparse.Namespace:
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Deploy AnomalyMachine-50K demo to Hugging Face Spaces."
)
parser.add_argument(
"--output_dir",
type=str,
default=os.path.join("..", "output"),
help="Path to generated dataset output directory.",
)
parser.add_argument(
"--repo_id",
type=str,
default="mandipgoswami/AnomalyMachine-Demo",
help="Target Hugging Face Space repository id.",
)
parser.add_argument(
"--private",
action="store_true",
help="Create the Space as private.",
)
return parser.parse_args()
def main() -> None:
"""Entry point for CLI execution."""
args = parse_args()
# Determine paths
script_dir = Path(__file__).resolve().parent
spaces_demo_dir = script_dir
project_root = script_dir.parent
output_dir = Path(args.output_dir).resolve()
if not output_dir.is_absolute():
output_dir = project_root / args.output_dir
token = ensure_hf_token(project_root)
deploy_space(
spaces_demo_dir=spaces_demo_dir,
output_dir=output_dir,
repo_id=args.repo_id,
token=token,
private=args.private,
)
if __name__ == "__main__":
main()