Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Startup Fix Script for Dressify | |
| Handles dataset preparation issues and ensures system startup | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import time | |
| from pathlib import Path | |
| def check_dataset_status(): | |
| """Check the current dataset status.""" | |
| print("π Checking dataset status...") | |
| root = os.path.abspath(os.path.join(os.getcwd(), "data", "Polyvore")) | |
| if not os.path.exists(root): | |
| print(f"β Dataset directory not found: {root}") | |
| return False | |
| # Check key components | |
| images_dir = os.path.join(root, "images") | |
| splits_dir = os.path.join(root, "splits") | |
| has_images = os.path.isdir(images_dir) and any(Path(images_dir).glob("*")) | |
| has_splits = os.path.isdir(splits_dir) and any(Path(splits_dir).glob("*.json")) | |
| print(f"π Dataset root: {root}") | |
| print(f"πΌοΈ Images: {'β ' if has_images else 'β'} ({images_dir})") | |
| print(f"π Splits: {'β ' if has_splits else 'β'} ({splits_dir})") | |
| # Check for official splits | |
| official_splits = [] | |
| for location in ["nondisjoint", "disjoint"]: | |
| location_path = os.path.join(root, location) | |
| if os.path.exists(location_path): | |
| for split in ["train", "valid", "test"]: | |
| split_file = os.path.join(location_path, f"{split}.json") | |
| if os.path.exists(split_file): | |
| size_mb = os.path.getsize(split_file) / (1024 * 1024) | |
| official_splits.append(f"{location}/{split}.json ({size_mb:.1f} MB)") | |
| if official_splits: | |
| print(f"π― Official splits found:") | |
| for split in official_splits: | |
| print(f" β {split}") | |
| if has_images and has_splits: | |
| print("β Dataset is ready!") | |
| return True | |
| elif has_images: | |
| print("β οΈ Images present but splits missing - will create splits from official data") | |
| return "needs_splits" | |
| else: | |
| print("β Dataset incomplete - needs full preparation") | |
| return False | |
| def prepare_dataset(): | |
| """Prepare the dataset using the improved scripts.""" | |
| print("\nπ Preparing dataset...") | |
| root = os.path.abspath(os.path.join(os.getcwd(), "data", "Polyvore")) | |
| # First, ensure the data fetcher runs | |
| try: | |
| print("π₯ Running data fetcher...") | |
| from utils.data_fetch import ensure_dataset_ready | |
| dataset_root = ensure_dataset_ready() | |
| if not dataset_root: | |
| print("β Data fetcher failed") | |
| return False | |
| print(f"β Data fetcher completed: {dataset_root}") | |
| except Exception as e: | |
| print(f"β Data fetcher error: {e}") | |
| return False | |
| # Now run the dataset preparation script (without random splits) | |
| try: | |
| print("π§ Running dataset preparation...") | |
| # Check if prepare_polyvore.py exists | |
| prep_script = "scripts/prepare_polyvore.py" | |
| if not os.path.exists(prep_script): | |
| prep_script = "prepare_polyvore.py" | |
| if not os.path.exists(prep_script): | |
| print(f"β Prepare script not found: {prep_script}") | |
| return False | |
| # Run the preparation script WITHOUT random splits | |
| cmd = [ | |
| sys.executable, prep_script, | |
| "--root", root | |
| # Note: NOT using --force_random_split | |
| ] | |
| print(f"π§ Running: {' '.join(cmd)}") | |
| print("π― This will use official splits from nondisjoint/ and disjoint/ folders") | |
| result = subprocess.run(cmd, capture_output=True, text=True, check=False) | |
| if result.returncode == 0: | |
| print("β Dataset preparation completed successfully!") | |
| print("π Output:") | |
| print(result.stdout) | |
| return True | |
| else: | |
| print("β Dataset preparation failed!") | |
| print("π Error output:") | |
| print(result.stderr) | |
| print("π Standard output:") | |
| print(result.stdout) | |
| # Check if it's because official splits are missing | |
| if "No official splits found" in result.stderr or "No official splits found" in result.stdout: | |
| print("\nπ§ Issue: Official splits not found in nondisjoint/ or disjoint/ folders") | |
| print("π Expected structure:") | |
| print(" data/Polyvore/") | |
| print(" βββ nondisjoint/") | |
| print(" β βββ train.json") | |
| print(" β βββ valid.json") | |
| print(" β βββ test.json") | |
| print(" βββ disjoint/") | |
| print(" β βββ train.json") | |
| print(" β βββ valid.json") | |
| print(" β βββ test.json") | |
| print(" βββ images/") | |
| print("\nπ‘ Solution: The dataset should have been downloaded with official splits.") | |
| print(" Check if the Hugging Face download completed successfully.") | |
| return False | |
| except Exception as e: | |
| print(f"β Dataset preparation error: {e}") | |
| return False | |
| def verify_splits(): | |
| """Verify that splits were created successfully.""" | |
| print("\nπ Verifying splits...") | |
| root = os.path.abspath(os.path.join(os.getcwd(), "data", "Polyvore")) | |
| splits_dir = os.path.join(root, "splits") | |
| if not os.path.exists(splits_dir): | |
| print("β Splits directory not found") | |
| return False | |
| required_files = [ | |
| "train.json", | |
| "outfits_train.json", | |
| "outfit_triplets_train.json" | |
| ] | |
| missing_files = [] | |
| for file_name in required_files: | |
| file_path = os.path.join(splits_dir, file_name) | |
| if os.path.exists(file_path): | |
| size_mb = os.path.getsize(file_path) / (1024 * 1024) | |
| print(f"β {file_name}: {size_mb:.1f} MB") | |
| else: | |
| print(f"β {file_name}: Missing") | |
| missing_files.append(file_name) | |
| if missing_files: | |
| print(f"β Missing required files: {missing_files}") | |
| return False | |
| print("β All required splits verified!") | |
| return True | |
| def test_training_scripts(): | |
| """Test that training scripts can run without errors.""" | |
| print("\nπ§ͺ Testing training scripts...") | |
| # Test ResNet training script | |
| try: | |
| print("π§ Testing ResNet training script...") | |
| from models.resnet_embedder import ResNetItemEmbedder | |
| print("β ResNet model imports successfully") | |
| except Exception as e: | |
| print(f"β ResNet model import failed: {e}") | |
| return False | |
| # Test ViT training script | |
| try: | |
| print("π§ Testing ViT training script...") | |
| from models.vit_outfit import OutfitCompatibilityModel | |
| print("β ViT model imports successfully") | |
| except Exception as e: | |
| print(f"β ViT model import failed: {e}") | |
| return False | |
| print("β All training scripts tested successfully!") | |
| return True | |
| def create_quick_start_script(): | |
| """Create a quick start script for easy testing.""" | |
| script_content = """#!/bin/bash | |
| # Quick Start Script for Dressify | |
| # This script will prepare the dataset and start training | |
| echo "π Dressify Quick Start" | |
| echo "========================" | |
| # Check if dataset is ready | |
| if [ -d "data/Polyvore/splits" ] && [ -f "data/Polyvore/splits/train.json" ]; then | |
| echo "β Dataset is ready!" | |
| else | |
| echo "π§ Preparing dataset..." | |
| python startup_fix.py | |
| fi | |
| # Start quick training | |
| echo "π― Starting quick training..." | |
| python train_resnet.py --data_root data/Polyvore --epochs 3 --out models/exports/resnet_quick.pth | |
| echo "π Quick start completed!" | |
| echo "π Check models/exports/ for trained models" | |
| """ | |
| script_path = "quick_start.sh" | |
| with open(script_path, "w") as f: | |
| f.write(script_content) | |
| # Make executable | |
| os.chmod(script_path, 0o755) | |
| print(f"π Created quick start script: {script_path}") | |
| def main(): | |
| """Main startup fix routine.""" | |
| print("π Dressify Startup Fix") | |
| print("=" * 50) | |
| # Check current status | |
| status = check_dataset_status() | |
| if status is True: | |
| print("β System is ready to go!") | |
| return True | |
| elif status == "needs_splits": | |
| print("π§ Dataset needs splits created from official data...") | |
| if prepare_dataset(): | |
| if verify_splits(): | |
| print("β Dataset preparation completed successfully!") | |
| return True | |
| else: | |
| print("β Split verification failed") | |
| return False | |
| else: | |
| print("β Dataset preparation failed") | |
| return False | |
| else: | |
| print("π§ Dataset needs full preparation...") | |
| if prepare_dataset(): | |
| if verify_splits(): | |
| print("β Dataset preparation completed successfully!") | |
| return True | |
| else: | |
| print("β Split verification failed") | |
| return False | |
| else: | |
| print("β Dataset preparation failed") | |
| return False | |
| if __name__ == "__main__": | |
| try: | |
| success = main() | |
| if success: | |
| print("\nπ Startup fix completed successfully!") | |
| print("π Your Dressify system is ready to use!") | |
| # Create quick start script | |
| create_quick_start_script() | |
| print("\nπ Next steps:") | |
| print("1. Run: python app.py") | |
| print("2. Or use: ./quick_start.sh") | |
| print("3. Check the Advanced Training tab for parameter controls") | |
| else: | |
| print("\nβ Startup fix failed!") | |
| print("π§ Please check the error messages above") | |
| print("π Contact support if issues persist") | |
| except KeyboardInterrupt: | |
| print("\nβΉοΈ Startup fix interrupted by user") | |
| except Exception as e: | |
| print(f"\nπ₯ Unexpected error: {e}") | |
| import traceback | |
| traceback.print_exc() | |