Spaces:

mandipgoswami
/

AnomalyMachine-Demo

Running

App Files Files Community

AnomalyMachine-Demo / deploy_spaces.py

mandipgoswami

Upload folder using huggingface_hub

75c0e17 verified 15 days ago

raw

history blame contribute delete

9.64 kB

	"""Deploy the AnomalyMachine-50K demo to Hugging Face Spaces."""

	import argparse
	import os
	import shutil
	from pathlib import Path

	from dotenv import load_dotenv
	from huggingface_hub import HfApi, upload_folder


	def ensure_hf_token(project_root: Path) -> str:
	"""Load the Hugging Face token from environment variables."""
	# Try loading from project root .env file first
	env_file = project_root / ".env"
	env_loaded = False

	if env_file.exists():
	print(f"Loading .env from: {env_file}")
	load_dotenv(env_file, override=True)
	env_loaded = True
	else:
	# Fallback: try current directory
	print("Project root .env not found, trying current directory...")
	load_dotenv()

	# Also try spaces_demo/.env if it exists
	spaces_env = project_root / "spaces_demo" / ".env"
	if spaces_env.exists():
	print(f"Also loading .env from: {spaces_env}")
	load_dotenv(spaces_env, override=True)

	# Check environment variable
	token = os.getenv("HF_TOKEN")

	if not token:
	print("\n" + "="*60)
	print("ERROR: HF_TOKEN not found!")
	print("="*60)
	print(f"\nChecked locations:")
	print(f" 1. {env_file} {'✓' if env_file.exists() else '✗ (not found)'}")
	print(f" 2. {spaces_env} {'✓' if spaces_env.exists() else '✗ (not found)'}")
	print(f" 3. Environment variable: {'✓' if os.getenv('HF_TOKEN') else '✗ (not set)'}")
	print("\nTo fix this:")
	print("1. Create or edit .env file in the project root:")
	print(f" {env_file}")
	print("2. Add your Hugging Face token (must start with 'hf_'):")
	print(" HF_TOKEN=hf_your_actual_token_here")
	print("\nOr set it as an environment variable:")
	print(" $env:HF_TOKEN='hf_your_token' # PowerShell")
	print(" export HF_TOKEN='hf_your_token' # Bash")
	print("\nGet your token from: https://huggingface.co/settings/tokens")
	print("="*60 + "\n")
	raise EnvironmentError("HF_TOKEN is not set in environment or .env file.")

	# Remove any quotes or whitespace
	token = token.strip().strip('"').strip("'").strip()

	# Show first few characters for debugging (without exposing full token)
	token_preview = token[:10] + "..." if len(token) > 10 else token[:len(token)]
	print(f"Token loaded: {token_preview} (length: {len(token)})")

	# Basic validation
	if not token.startswith("hf_"):
	print("\n" + "="*60)
	print("ERROR: Invalid token format!")
	print("="*60)
	print(f"\nYour token starts with: '{token[:5]}...'")
	print("But it should start with: 'hf_'")
	print("\nCommon issues:")
	print("1. Token might have extra spaces or quotes")
	print("2. Token might be incomplete")
	print("3. You might be using an old token format")
	print("\nGet a new token from: https://huggingface.co/settings/tokens")
	print("Make sure to copy the FULL token (it should start with 'hf_')")
	print("="*60 + "\n")
	raise ValueError("HF_TOKEN must start with 'hf_'. Please check your token.")

	return token


	def copy_example_audio_files(
	source_output_dir: Path, target_examples_dir: Path, num_per_machine: int = 1
	) -> None:
	"""Copy example audio files from generated dataset to examples directory."""
	source_audio_dir = source_output_dir / "audio"
	if not source_audio_dir.exists():
	print(f"Warning: Source audio directory not found at {source_audio_dir}")
	return

	target_examples_dir.mkdir(parents=True, exist_ok=True)

	machine_types = ["fan", "pump", "compressor", "conveyor_belt", "electric_motor", "valve"]

	copied_count = 0
	for machine in machine_types:
	# Find anomalous examples for this machine
	pattern = f"{machine}__anomalous_.wav"
	matching_files = list(source_audio_dir.glob(pattern))

	if matching_files:
	# Copy first N anomalous examples
	for audio_file in matching_files[:num_per_machine]:
	dest_path = target_examples_dir / audio_file.name
	shutil.copy2(audio_file, dest_path)
	copied_count += 1
	print(f"Copied: {audio_file.name}")

	print(f"Copied {copied_count} example audio files to {target_examples_dir}")


	def deploy_space(
	spaces_demo_dir: Path,
	output_dir: Path,
	repo_id: str,
	token: str,
	private: bool = False,
	) -> None:
	"""Deploy the demo to Hugging Face Spaces."""
	api = HfApi(token=token)

	# Verify token works by checking user info
	try:
	user_info = api.whoami(token=token)
	print(f"Authenticated as: {user_info.get('name', 'Unknown')}")
	except Exception as e:
	print(f"\n❌ Authentication failed: {e}")
	print("\nTroubleshooting:")
	print("1. Your HF_TOKEN may be invalid or expired")
	print("2. Get a new token at: https://huggingface.co/settings/tokens")
	print("3. Make sure the token has 'write' permissions")
	print("4. Update your .env file with the new token")
	raise

	# Check if repo exists
	try:
	api.repo_info(repo_id, repo_type="space", token=token)
	print(f"Space '{repo_id}' already exists. Updating...")
	except Exception:
	print(f"Creating new Space '{repo_id}'...")
	try:
	api.create_repo(
	repo_id=repo_id,
	token=token,
	repo_type="space",
	exist_ok=True,
	space_sdk="gradio",
	)
	print(f"✅ Space created successfully!")
	except Exception as create_error:
	print(f"\n❌ Error creating Space: {create_error}")
	print("\nTroubleshooting:")
	print("1. Verify your HF_TOKEN is valid and not expired")
	print("2. Check that the token has 'write' permissions")
	print("3. Get a new token at: https://huggingface.co/settings/tokens")
	username = repo_id.split('/')[0]
	print(f"4. Make sure your username '{username}' is correct")
	print(f"5. Check if you have permission to create Spaces under '{username}'")
	raise

	# Copy example audio files from generated dataset
	examples_dir = spaces_demo_dir / "examples"
	if output_dir.exists():
	print("Copying example audio files from generated dataset...")
	copy_example_audio_files(output_dir, examples_dir, num_per_machine=1)
	else:
	print(f"Warning: Output directory not found at {output_dir}. Skipping example files.")

	# Ensure README.md doesn't have emoji field (fix encoding issues)
	readme_path = spaces_demo_dir / "README.md"
	if readme_path.exists():
	print("Ensuring README.md has correct format...")
	with open(readme_path, "r", encoding="utf-8") as f:
	content = f.read()

	# Remove emoji line if it exists (to avoid encoding issues)
	lines = content.split("\n")
	filtered_lines = []
	for line in lines:
	if line.strip().startswith("emoji:"):
	print(f" Removing emoji line: {line.strip()}")
	continue
	filtered_lines.append(line)

	# Write back without emoji
	content_fixed = "\n".join(filtered_lines)
	with open(readme_path, "w", encoding="utf-8", newline="\n") as f:
	f.write(content_fixed)
	print("README.md cleaned and saved with UTF-8 encoding.")

	# Upload all files in spaces_demo directory
	print(f"Uploading files from {spaces_demo_dir}...")
	upload_folder(
	folder_path=str(spaces_demo_dir),
	repo_id=repo_id,
	repo_type="space",
	token=token,
	ignore_patterns=[".git", "__pycache__", "*.pyc", ".env"],
	)

	space_url = f"https://huggingface.co/spaces/{repo_id}"
	print(f"\n✅ Space deployed successfully!")
	print(f"🌐 Live at: {space_url}")


	def parse_args() -> argparse.Namespace:
	"""Parse command-line arguments."""
	parser = argparse.ArgumentParser(
	description="Deploy AnomalyMachine-50K demo to Hugging Face Spaces."
	)
	parser.add_argument(
	"--output_dir",
	type=str,
	default=os.path.join("..", "output"),
	help="Path to generated dataset output directory.",
	)
	parser.add_argument(
	"--repo_id",
	type=str,
	default="mandipgoswami/AnomalyMachine-Demo",
	help="Target Hugging Face Space repository id.",
	)
	parser.add_argument(
	"--private",
	action="store_true",
	help="Create the Space as private.",
	)
	return parser.parse_args()


	def main() -> None:
	"""Entry point for CLI execution."""
	args = parse_args()

	# Determine paths
	script_dir = Path(__file__).resolve().parent
	spaces_demo_dir = script_dir
	project_root = script_dir.parent
	output_dir = Path(args.output_dir).resolve()
	if not output_dir.is_absolute():
	output_dir = project_root / args.output_dir

	token = ensure_hf_token(project_root)

	deploy_space(
	spaces_demo_dir=spaces_demo_dir,
	output_dir=output_dir,
	repo_id=args.repo_id,
	token=token,
	private=args.private,
	)


	if __name__ == "__main__":
	main()