Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Runtime artifact fetcher for Dressify. | |
| Downloads pre-processed artifacts from Hugging Face Hub to avoid reprocessing. | |
| """ | |
| import os | |
| import json | |
| import shutil | |
| import tarfile | |
| import zipfile | |
| from pathlib import Path | |
| from typing import Dict, List, Any, Optional | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| class RuntimeArtifactFetcher: | |
| """Fetches artifacts from HF Hub at runtime to avoid reprocessing.""" | |
| def __init__(self, base_dir: str = "/home/user/app"): | |
| self.base_dir = base_dir | |
| self.data_dir = os.path.join(base_dir, "data/Polyvore") | |
| self.splits_dir = os.path.join(self.data_dir, "splits") | |
| self.export_dir = os.getenv("EXPORT_DIR", "models/exports") | |
| # Default HF repositories - updated to use your specific repos | |
| self.default_repos = { | |
| "splits": "Stylique/Dressify-Helper", | |
| "models": "Stylique/dressify-models", | |
| "metadata": "Stylique/Dressify-Helper" | |
| } | |
| def check_artifacts_needed(self) -> Dict[str, Any]: | |
| """Check what artifacts need to be fetched.""" | |
| needs = { | |
| "splits": False, | |
| "models": False, | |
| "metadata": False, | |
| "total_size_mb": 0 | |
| } | |
| # Check splits | |
| if not os.path.exists(self.splits_dir) or not self._has_complete_splits(): | |
| needs["splits"] = True | |
| needs["total_size_mb"] += 50 # Estimate splits size | |
| # Check models | |
| if not os.path.exists(self.export_dir) or not self._has_trained_models(): | |
| needs["models"] = True | |
| needs["total_size_mb"] += 200 # Estimate models size | |
| # Check metadata | |
| if not self._has_complete_metadata(): | |
| needs["metadata"] = True | |
| needs["total_size_mb"] += 100 # Estimate metadata size | |
| return needs | |
| def _has_complete_splits(self) -> bool: | |
| """Check if complete splits are available.""" | |
| required_files = [ | |
| "train.json", "valid.json", "test.json", | |
| "outfit_triplets_train.json", "outfit_triplets_valid.json", "outfit_triplets_test.json" | |
| ] | |
| for file in required_files: | |
| if not os.path.exists(os.path.join(self.splits_dir, file)): | |
| return False | |
| return True | |
| def _has_trained_models(self) -> bool: | |
| """Check if trained models are available.""" | |
| required_files = [ | |
| "resnet_item_embedder_best.pth", | |
| "vit_outfit_model_best.pth" | |
| ] | |
| for file in required_files: | |
| if not os.path.exists(os.path.join(self.export_dir, file)): | |
| return False | |
| return True | |
| def _has_complete_metadata(self) -> bool: | |
| """Check if complete metadata is available.""" | |
| required_files = [ | |
| "polyvore_item_metadata.json", | |
| "polyvore_outfit_titles.json", | |
| "categories.csv" | |
| ] | |
| for file in required_files: | |
| if not os.path.exists(os.path.join(self.data_dir, file)): | |
| return False | |
| return True | |
| def fetch_splits_from_hf(self, repo: str = None, token: str = None) -> bool: | |
| """Fetch dataset splits from HF Hub.""" | |
| if repo is None: | |
| repo = self.default_repos["splits"] | |
| try: | |
| print(f"π Fetching splits from {repo}...") | |
| # Create splits directory | |
| os.makedirs(self.splits_dir, exist_ok=True) | |
| # Download splits files | |
| split_files = [ | |
| "train.json", "valid.json", "test.json", | |
| "outfits_train.json", "outfits_valid.json", "outfits_test.json", | |
| "outfit_triplets_train.json", "outfit_triplets_valid.json", "outfit_triplets_test.json" | |
| ] | |
| for file in split_files: | |
| try: | |
| local_path = hf_hub_download( | |
| repo_id=repo, | |
| filename=f"splits/{file}", | |
| local_dir=self.splits_dir, | |
| token=token | |
| ) | |
| print(f"β Downloaded: {file}") | |
| except Exception as e: | |
| print(f"β οΈ Failed to download {file}: {e}") | |
| print(f"β Splits fetched successfully to {self.splits_dir}") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to fetch splits: {e}") | |
| return False | |
| def fetch_models_from_hf(self, repo: str = None, token: str = None) -> bool: | |
| """Fetch trained models from HF Hub.""" | |
| if repo is None: | |
| repo = self.default_repos["models"] | |
| try: | |
| print(f"π Fetching models from {repo}...") | |
| # Create export directory | |
| os.makedirs(self.export_dir, exist_ok=True) | |
| # Download model files | |
| model_files = [ | |
| "resnet_item_embedder_best.pth", | |
| "vit_outfit_model_best.pth", | |
| "resnet_metrics.json", | |
| "vit_metrics.json" | |
| ] | |
| for file in model_files: | |
| try: | |
| local_path = hf_hub_download( | |
| repo_id=repo, | |
| filename=file, | |
| local_dir=self.export_dir, | |
| token=token | |
| ) | |
| print(f"β Downloaded: {file}") | |
| except Exception as e: | |
| print(f"β οΈ Failed to download {file}: {e}") | |
| print(f"β Models fetched successfully to {self.export_dir}") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to fetch models: {e}") | |
| return False | |
| def fetch_metadata_from_hf(self, repo: str = None, token: str = None) -> bool: | |
| """Fetch metadata from HF Hub.""" | |
| if repo is None: | |
| repo = self.default_repos["metadata"] | |
| try: | |
| print(f"π Fetching metadata from {repo}...") | |
| # Create data directory | |
| os.makedirs(self.data_dir, exist_ok=True) | |
| # Download metadata files | |
| metadata_files = [ | |
| "polyvore_item_metadata.json", | |
| "polyvore_outfit_titles.json", | |
| "categories.csv" | |
| ] | |
| for file in metadata_files: | |
| try: | |
| local_path = hf_hub_download( | |
| repo_id=repo, | |
| filename=f"metadata/{file}", | |
| local_dir=self.data_dir, | |
| token=token | |
| ) | |
| print(f"β Downloaded: {file}") | |
| except Exception as e: | |
| print(f"β οΈ Failed to download {file}: {e}") | |
| print(f"β Metadata fetched successfully to {self.data_dir}") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to fetch metadata: {e}") | |
| return False | |
| def fetch_everything_from_hf(self, splits_repo: str = None, models_repo: str = None, | |
| metadata_repo: str = None, token: str = None) -> Dict[str, bool]: | |
| """Fetch all artifacts from HF Hub.""" | |
| results = {} | |
| print("π Starting comprehensive artifact fetch from HF Hub...") | |
| # Fetch splits | |
| results["splits"] = self.fetch_splits_from_hf(splits_repo, token) | |
| # Fetch models | |
| results["models"] = self.fetch_models_from_hf(models_repo, token) | |
| # Fetch metadata | |
| results["metadata"] = self.fetch_metadata_from_hf(metadata_repo, token) | |
| # Summary | |
| success_count = sum(results.values()) | |
| total_count = len(results) | |
| print(f"\nπ Fetch Summary: {success_count}/{total_count} successful") | |
| for artifact, success in results.items(): | |
| status = "β " if success else "β" | |
| print(f" {status} {artifact}") | |
| return results | |
| def download_and_extract_package(self, package_path: str, extract_to: str = None) -> bool: | |
| """Download and extract a package from HF Hub.""" | |
| try: | |
| if extract_to is None: | |
| extract_to = self.base_dir | |
| print(f"π Downloading and extracting package: {package_path}") | |
| # Download the package | |
| local_path = hf_hub_download( | |
| repo_id="Stylique/Dressify-Helper", | |
| filename=f"packages/{os.path.basename(package_path)}", | |
| local_dir=extract_to, | |
| token=None | |
| ) | |
| # Extract based on file type | |
| if package_path.endswith(".tar.gz"): | |
| with tarfile.open(local_path, 'r:gz') as tar: | |
| tar.extractall(extract_to) | |
| elif package_path.endswith(".zip"): | |
| with zipfile.ZipFile(local_path, 'r') as zipf: | |
| zipf.extractall(extract_to) | |
| print(f"β Package extracted to {extract_to}") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to download/extract package: {e}") | |
| return False | |
| def get_fetch_status(self) -> Dict[str, Any]: | |
| """Get current fetch status.""" | |
| return { | |
| "splits_available": self._has_complete_splits(), | |
| "models_available": self._has_trained_models(), | |
| "metadata_available": self._has_complete_metadata(), | |
| "artifacts_needed": self.check_artifacts_needed(), | |
| "base_dir": self.base_dir, | |
| "splits_dir": self.splits_dir, | |
| "export_dir": self.export_dir, | |
| "hf_repos": self.default_repos | |
| } | |
| def create_runtime_fetcher() -> RuntimeArtifactFetcher: | |
| """Create a runtime fetcher instance.""" | |
| return RuntimeArtifactFetcher() | |
| def auto_fetch_if_needed(token: str = None) -> Dict[str, bool]: | |
| """Automatically fetch artifacts if they're needed.""" | |
| fetcher = create_runtime_fetcher() | |
| # Check what's needed | |
| needs = fetcher.check_artifacts_needed() | |
| if not any([needs["splits"], needs["models"], needs["metadata"]]): | |
| print("β All artifacts are already available - no fetching needed") | |
| return {"splits": True, "models": True, "metadata": True} | |
| print(f"π Auto-fetching needed artifacts (estimated size: {needs['total_size_mb']} MB)") | |
| # Fetch what's needed | |
| results = {} | |
| if needs["splits"]: | |
| results["splits"] = fetcher.fetch_splits_from_hf(token=token) | |
| if needs["models"]: | |
| results["models"] = fetcher.fetch_models_from_hf(token=token) | |
| if needs["metadata"]: | |
| results["metadata"] = fetcher.fetch_metadata_from_hf(token=token) | |
| return results | |
| if __name__ == "__main__": | |
| # Test the fetcher | |
| fetcher = create_runtime_fetcher() | |
| status = fetcher.get_fetch_status() | |
| print("Current fetch status:", json.dumps(status, indent=2)) | |