Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Hugging Face Hub integration for Dressify. | |
| Handles uploading artifacts to specific HF repositories. | |
| """ | |
| import os | |
| import json | |
| import shutil | |
| from datetime import datetime | |
| from typing import Dict, List, Any, Optional | |
| from huggingface_hub import HfApi, create_repo, upload_file, upload_folder | |
| from pathlib import Path | |
| class HFHubIntegration: | |
| """Integrates with Hugging Face Hub for artifact management.""" | |
| def __init__(self, token: str = None): | |
| self.api = HfApi(token=token) | |
| self.token = token | |
| # Your specific repositories | |
| self.repos = { | |
| "models": "Stylique/dressify-models", | |
| "helper": "Stylique/Dressify-Helper" | |
| } | |
| # Repository descriptions and metadata | |
| self.repo_metadata = { | |
| "Stylique/dressify-models": { | |
| "description": "Dressify trained models and checkpoints for outfit recommendation", | |
| "tags": ["computer-vision", "fashion", "outfit-recommendation", "deep-learning"], | |
| "license": "mit", | |
| "language": "en" | |
| }, | |
| "Stylique/Dressify-Helper": { | |
| "description": "Dressify dataset splits, metadata, and helper files", | |
| "tags": ["dataset", "fashion", "outfit-recommendation", "polyvore"], | |
| "license": "mit", | |
| "language": "en" | |
| } | |
| } | |
| def ensure_repos_exist(self) -> Dict[str, bool]: | |
| """Ensure all required repositories exist, create if they don't.""" | |
| results = {} | |
| for repo_id in self.repos.values(): | |
| try: | |
| # Try to get repo info | |
| repo_info = self.api.repo_info(repo_id) | |
| results[repo_id] = True | |
| print(f"✅ Repository exists: {repo_id}") | |
| except Exception: | |
| try: | |
| # Create repository | |
| if "models" in repo_id: | |
| create_repo( | |
| repo_id=repo_id, | |
| repo_type="model", | |
| token=self.token, | |
| description=self.repo_metadata[repo_id]["description"], | |
| license=self.repo_metadata[repo_id]["license"], | |
| tags=self.repo_metadata[repo_id]["tags"] | |
| ) | |
| else: | |
| create_repo( | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| token=self.token, | |
| description=self.repo_metadata[repo_id]["description"], | |
| license=self.repo_metadata[repo_id]["license"], | |
| tags=self.repo_metadata[repo_id]["tags"] | |
| ) | |
| results[repo_id] = True | |
| print(f"✅ Created repository: {repo_id}") | |
| except Exception as e: | |
| results[repo_id] = False | |
| print(f"❌ Failed to create repository {repo_id}: {e}") | |
| return results | |
| def upload_models_to_hf(self, models_dir: str = None) -> Dict[str, Any]: | |
| """Upload trained models to the models repository.""" | |
| if models_dir is None: | |
| models_dir = os.getenv("EXPORT_DIR", "models/exports") | |
| if not os.path.exists(models_dir): | |
| return {"success": False, "error": f"Models directory not found: {models_dir}"} | |
| try: | |
| print(f"🚀 Uploading models to {self.repos['models']}...") | |
| # Files to upload | |
| model_files = [ | |
| "resnet_item_embedder_best.pth", | |
| "vit_outfit_model_best.pth", | |
| "resnet_metrics.json", | |
| "vit_metrics.json" | |
| ] | |
| uploaded_files = [] | |
| total_size = 0 | |
| for file in model_files: | |
| file_path = os.path.join(models_dir, file) | |
| if os.path.exists(file_path): | |
| try: | |
| # Upload file | |
| self.api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=file, | |
| repo_id=self.repos['models'], | |
| token=self.token | |
| ) | |
| size_mb = round(os.path.getsize(file_path) / (1024 * 1024), 2) | |
| total_size += size_mb | |
| uploaded_files.append({ | |
| "name": file, | |
| "size_mb": size_mb, | |
| "status": "uploaded" | |
| }) | |
| print(f"✅ Uploaded: {file} ({size_mb} MB)") | |
| except Exception as e: | |
| uploaded_files.append({ | |
| "name": file, | |
| "status": "failed", | |
| "error": str(e) | |
| }) | |
| print(f"❌ Failed to upload {file}: {e}") | |
| # Create model card | |
| self._create_model_card() | |
| result = { | |
| "success": True, | |
| "repository": self.repos['models'], | |
| "uploaded_files": uploaded_files, | |
| "total_size_mb": total_size, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| print(f"🎉 Models upload completed! Total size: {total_size} MB") | |
| return result | |
| except Exception as e: | |
| return {"success": False, "error": str(e)} | |
| def upload_splits_to_hf(self, splits_dir: str = None) -> Dict[str, Any]: | |
| """Upload dataset splits to the helper repository.""" | |
| if splits_dir is None: | |
| splits_dir = os.path.join(os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore"), "splits") | |
| if not os.path.exists(splits_dir): | |
| return {"success": False, "error": f"Splits directory not found: {splits_dir}"} | |
| try: | |
| print(f"🚀 Uploading splits to {self.repos['helper']}...") | |
| # Upload entire splits directory | |
| self.api.upload_folder( | |
| folder_path=splits_dir, | |
| path_in_repo="splits", | |
| repo_id=self.repos['helper'], | |
| token=self.token | |
| ) | |
| # Calculate total size | |
| total_size = 0 | |
| for root, dirs, files in os.walk(splits_dir): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| total_size += os.path.getsize(file_path) | |
| total_size_mb = round(total_size / (1024 * 1024), 2) | |
| result = { | |
| "success": True, | |
| "repository": self.repos['helper'], | |
| "uploaded_folder": "splits", | |
| "total_size_mb": total_size_mb, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| print(f"🎉 Splits upload completed! Total size: {total_size_mb} MB") | |
| return result | |
| except Exception as e: | |
| return {"success": False, "error": str(e)} | |
| def upload_metadata_to_hf(self, data_dir: str = None) -> Dict[str, Any]: | |
| """Upload metadata files to the helper repository.""" | |
| if data_dir is None: | |
| data_dir = os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore") | |
| if not os.path.exists(data_dir): | |
| return {"success": False, "error": f"Data directory not found: {data_dir}"} | |
| try: | |
| print(f"🚀 Uploading metadata to {self.repos['helper']}...") | |
| # Metadata files to upload | |
| metadata_files = [ | |
| "polyvore_item_metadata.json", | |
| "polyvore_outfit_titles.json", | |
| "categories.csv" | |
| ] | |
| uploaded_files = [] | |
| total_size = 0 | |
| for file in metadata_files: | |
| file_path = os.path.join(data_dir, file) | |
| if os.path.exists(file_path): | |
| try: | |
| # Upload to metadata subfolder | |
| self.api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=f"metadata/{file}", | |
| repo_id=self.repos['helper'], | |
| token=self.token | |
| ) | |
| size_mb = round(os.path.getsize(file_path) / (1024 * 1024), 2) | |
| total_size += size_mb | |
| uploaded_files.append({ | |
| "name": file, | |
| "size_mb": size_mb, | |
| "status": "uploaded" | |
| }) | |
| print(f"✅ Uploaded: {file} ({size_mb} MB)") | |
| except Exception as e: | |
| uploaded_files.append({ | |
| "name": file, | |
| "status": "failed", | |
| "error": str(e) | |
| }) | |
| print(f"❌ Failed to upload {file}: {e}") | |
| result = { | |
| "success": True, | |
| "repository": self.repos['helper'], | |
| "uploaded_files": uploaded_files, | |
| "total_size_mb": total_size, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| print(f"🎉 Metadata upload completed! Total size: {total_size} MB") | |
| return result | |
| except Exception as e: | |
| return {"success": False, "error": str(e)} | |
| def upload_everything_to_hf(self) -> Dict[str, Any]: | |
| """Upload all artifacts to HF Hub.""" | |
| print("🚀 Starting comprehensive upload to HF Hub...") | |
| # Ensure repositories exist | |
| repo_status = self.ensure_repos_exist() | |
| if not all(repo_status.values()): | |
| return {"success": False, "error": "Failed to ensure repositories exist"} | |
| # Upload everything | |
| results = { | |
| "models": self.upload_models_to_hf(), | |
| "splits": self.upload_splits_to_hf(), | |
| "metadata": self.upload_metadata_to_hf(), | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| # Summary | |
| success_count = sum(1 for r in results.values() if isinstance(r, dict) and r.get("success", False)) | |
| total_count = len([r for r in results.values() if isinstance(r, dict)]) | |
| print(f"\n📊 Upload Summary: {success_count}/{total_count} successful") | |
| for category, result in results.items(): | |
| if isinstance(result, dict): | |
| status = "✅" if result.get("success", False) else "❌" | |
| print(f" {status} {category}") | |
| return results | |
| def _create_model_card(self): | |
| """Create a model card for the models repository.""" | |
| model_card_content = """--- | |
| language: en | |
| license: mit | |
| tags: | |
| - computer-vision | |
| - fashion | |
| - outfit-recommendation | |
| - deep-learning | |
| - resnet | |
| - vision-transformer | |
| --- | |
| # Dressify Outfit Recommendation Models | |
| This repository contains the trained models for the Dressify outfit recommendation system. | |
| ## Models | |
| ### ResNet Item Embedder | |
| - **Architecture**: ResNet50 with custom projection head | |
| - **Purpose**: Generate 512-dimensional embeddings for fashion items | |
| - **Training**: Triplet loss with semi-hard negative mining | |
| - **Input**: Fashion item images (224x224) | |
| - **Output**: L2-normalized 512D embeddings | |
| ### ViT Outfit Compatibility Model | |
| - **Architecture**: Vision Transformer encoder | |
| - **Purpose**: Score outfit compatibility from item embeddings | |
| - **Training**: Triplet loss with cosine distance | |
| - **Input**: Variable-length sequence of item embeddings | |
| - **Output**: Compatibility score (0-1) | |
| ## Usage | |
| ```python | |
| from huggingface_hub import hf_hub_download | |
| import torch | |
| # Download models | |
| resnet_path = hf_hub_download(repo_id="Stylique/dressify-models", filename="resnet_item_embedder_best.pth") | |
| vit_path = hf_hub_download(repo_id="Stylique/dressify-models", filename="vit_outfit_model_best.pth") | |
| # Load models | |
| resnet_model = torch.load(resnet_path) | |
| vit_model = torch.load(vit_path) | |
| ``` | |
| ## Training Details | |
| - **Dataset**: Polyvore Outfits (Stylique/Polyvore) | |
| - **Loss**: Triplet margin loss | |
| - **Optimizer**: AdamW | |
| - **Mixed Precision**: Enabled | |
| - **Hardware**: NVIDIA GPU with CUDA | |
| ## Performance | |
| - **ResNet**: ~25M parameters, fast inference | |
| - **ViT**: ~12M parameters, efficient outfit scoring | |
| - **Memory**: Optimized for deployment on Hugging Face Spaces | |
| ## Citation | |
| If you use these models in your research, please cite: | |
| ```bibtex | |
| @misc{dressify2024, | |
| title={Dressify: Deep Learning for Fashion Outfit Recommendation}, | |
| author={Stylique}, | |
| year={2024}, | |
| url={https://huggingface.co/Stylique/dressify-models} | |
| } | |
| ``` | |
| """ | |
| # Save model card | |
| model_card_path = "model_card.md" | |
| with open(model_card_path, 'w') as f: | |
| f.write(model_card_content) | |
| # Upload model card | |
| try: | |
| self.api.upload_file( | |
| path_or_fileobj=model_card_path, | |
| path_in_repo="README.md", | |
| repo_id=self.repos['models'], | |
| token=self.token | |
| ) | |
| print("✅ Model card uploaded") | |
| # Clean up | |
| os.remove(model_card_path) | |
| except Exception as e: | |
| print(f"⚠️ Failed to upload model card: {e}") | |
| def get_upload_status(self) -> Dict[str, Any]: | |
| """Get current upload status and repository information.""" | |
| status = { | |
| "repositories": {}, | |
| "last_upload": None, | |
| "total_uploads": 0 | |
| } | |
| for repo_id in self.repos.values(): | |
| try: | |
| repo_info = self.api.repo_info(repo_id) | |
| status["repositories"][repo_id] = { | |
| "exists": True, | |
| "last_modified": repo_info.last_modified.isoformat() if repo_info.last_modified else None, | |
| "size": repo_info.size_on_disk if hasattr(repo_info, 'size_on_disk') else None | |
| } | |
| except Exception: | |
| status["repositories"][repo_id] = { | |
| "exists": False, | |
| "last_modified": None, | |
| "size": None | |
| } | |
| return status | |
| def create_hf_integration(token: str = None) -> HFHubIntegration: | |
| """Create an HF Hub integration instance.""" | |
| return HFHubIntegration(token=token) | |