Spaces:

Stylique
/

recomendation

Paused

App Files Files Community

Ali Mohsin commited on Sep 8, 2025

Commit

4a5ec80

1 Parent(s): 25bdf34

Updated new changes

Browse files

Files changed (2) hide show

app.py +61 -10
inference.py +127 -36

app.py CHANGED Viewed

@@ -256,8 +256,8 @@ def _background_bootstrap():
             import sys
             argv_bak = sys.argv
             try:
-                # Use official splits from nondisjoint/ and disjoint/ folders with default size limit (160 samples)
-                sys.argv = ["prepare_polyvore.py", "--root", ds_root, "--max_samples", "160"]
                 prepare_main()
             finally:
                 sys.argv = argv_bak
@@ -390,6 +390,20 @@ def _stitch_strip(imgs: List[Image.Image], height: int = 256, pad: int = 6, bg=(
 def gradio_recommend(files: List[str], occasion: str, weather: str, num_outfits: int):
     # Return stitched outfit images and a JSON with details
     if not files:
         return [], {"error": "No files uploaded"}
@@ -402,6 +416,11 @@ def gradio_recommend(files: List[str], occasion: str, weather: str, num_outfits:
         for i in range(len(images))
     ]
     res = service.compose_outfits(items, context={"occasion": occasion, "weather": weather, "num_outfits": int(num_outfits)})
     # Prepare stitched previews
     strips: List[Image.Image] = []
     for r in res:
@@ -595,7 +614,19 @@ def start_training_advanced(
                 log_message += "🎉 All training completed! Models saved to models/exports/\n"
                 log_message += "🔄 Reloading models for inference...\n"
                 service.reload_models()
-                log_message += "✅ Models reloaded and ready for inference!\n"
                 # Auto-upload to HF Hub if token is available
                 hf_token = os.getenv("HF_TOKEN")
@@ -689,7 +720,21 @@ def start_training_simple(dataset_size: str, res_epochs: int, vit_epochs: int):
                 log_message += f"❌ ViT training failed: {vit_result.stderr}\n"
                 return log_message
             service.reload_models()
-            log_message += "\nDone. Artifacts in models/exports."
             # Auto-upload to HF Hub if token is available
             hf_token = os.getenv("HF_TOKEN")
@@ -740,12 +785,12 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
         with gr.Row():
             gr.Markdown("#### 📊 **Current Behavior**")
-            gr.Markdown("• **Bootstrap**: Downloads full dataset (53K outfits) + generates splits with **160 samples by default**\n• **Training**: Uses 160 samples (ultra-fast testing!)\n• **Apply Button**: Regenerates splits with your selected size limit")
         with gr.Row():
             global_dataset_size = gr.Dropdown(
                 choices=["160", "2000", "5000", "10000", "25000", "50000", "full"],
-                value="160",
                 label="Global Dataset Size (Affects Prep + Training)"
             )
             gr.Markdown("**160**: Ultra-fast testing (~30 sec prep, ~1-2 min training)\n**2000**: Fast testing (~1-2 min prep, ~2-5 min training)\n**5000**: Fast testing (~2-3 min prep, ~5-10 min training)\n**10000**: Good testing (~3-5 min prep, ~10-20 min training)\n**full**: Production (~5-10 min prep, ~1-4 hours training)")
@@ -753,11 +798,11 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
         with gr.Row():
             # Apply dataset size button
             apply_size_btn = gr.Button("🔄 Apply Dataset Size & Regenerate Splits", variant="primary")
-            size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 160 samples (click Apply to regenerate splits)", interactive=False)
             # Current dataset info
             gr.Markdown("#### 📊 **Current Dataset Status**")
-            gr.Markdown("• **Full dataset downloaded**: 53,306 outfits (required for system)\n• **Splits generated**: **160 samples by default** (ultra-fast testing!)\n• **Training will use**: 160 samples (ultra-fast!)\n• **Scale up**: Use Apply button to increase to larger sizes")
         def apply_dataset_size(size: str):
             """Apply global dataset size and regenerate splits."""
@@ -810,7 +855,7 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
                 gr.Markdown("#### 📊 Dataset Size Control")
                 gr.Markdown("Start small for testing, increase for production training")
                 dataset_size = gr.Dropdown(
-                    choices=["2000", "5000", "10000", "25000", "50000", "full"],
                     value="2000",
                     label="Training Dataset Size"
                 )
@@ -1003,7 +1048,7 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
                 gr.Markdown("#### 📊 Dataset Size Control")
                 gr.Markdown("Start small for testing, increase for production training")
                 dataset_size = gr.Dropdown(
-                    choices=["2000", "5000", "10000", "25000", "50000", "full"],
                     value="2000",
                     label="Training Dataset Size"
                 )
@@ -1032,6 +1077,12 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
         refresh_status = gr.Button("🔄 Refresh Status")
         refresh_status.click(fn=lambda: BOOT_STATUS, inputs=[], outputs=status)
         # System info
         gr.Markdown("#### 💻 System Information")
         device_info = gr.Textbox(label="Device", value=lambda: f"Device: {service.device}")

             import sys
             argv_bak = sys.argv
             try:
+                # Use official splits from nondisjoint/ and disjoint/ folders with default size limit (2000 samples for better early stopping)
+                sys.argv = ["prepare_polyvore.py", "--root", ds_root, "--max_samples", "2000"]
                 prepare_main()
             finally:
                 sys.argv = argv_bak
 def gradio_recommend(files: List[str], occasion: str, weather: str, num_outfits: int):
+    # Check model status first
+    model_status = service.get_model_status()
+    if not model_status["can_recommend"]:
+        error_msg = "❌ Models not ready for recommendations!\n\n"
+        error_msg += "**Model Status:**\n"
+        error_msg += f"- ResNet: {'✅ Loaded' if model_status['resnet_loaded'] else '❌ Not loaded'}\n"
+        error_msg += f"- ViT: {'✅ Loaded' if model_status['vit_loaded'] else '❌ Not loaded'}\n\n"
+        error_msg += "**Errors:**\n"
+        for error in model_status["errors"]:
+            error_msg += f"- {error}\n\n"
+        error_msg += "**Solution:**\n"
+        error_msg += "Please train the models first using the 'Simple Training' or 'Advanced Training' tabs, or ensure trained checkpoints are available."
+        return [], {"error": error_msg, "model_status": model_status}
     # Return stitched outfit images and a JSON with details
     if not files:
         return [], {"error": "No files uploaded"}
         for i in range(len(images))
     ]
     res = service.compose_outfits(items, context={"occasion": occasion, "weather": weather, "num_outfits": int(num_outfits)})
+    # Check if compose_outfits returned an error
+    if res and isinstance(res[0], dict) and "error" in res[0]:
+        return [], res[0]
     # Prepare stitched previews
     strips: List[Image.Image] = []
     for r in res:
                 log_message += "🎉 All training completed! Models saved to models/exports/\n"
                 log_message += "🔄 Reloading models for inference...\n"
                 service.reload_models()
+                # Check if models loaded successfully
+                model_status = service.get_model_status()
+                if model_status["can_recommend"]:
+                    log_message += "✅ Models reloaded and ready for inference!\n"
+                    log_message += "🎉 You can now generate outfit recommendations!\n"
+                else:
+                    log_message += "⚠️ Models reloaded but validation failed!\n"
+                    log_message += "**Model Status:**\n"
+                    log_message += f"- ResNet: {'✅ Loaded' if model_status['resnet_loaded'] else '❌ Failed'}\n"
+                    log_message += f"- ViT: {'✅ Loaded' if model_status['vit_loaded'] else '❌ Failed'}\n"
+                    for error in model_status["errors"]:
+                        log_message += f"- {error}\n"
                 # Auto-upload to HF Hub if token is available
                 hf_token = os.getenv("HF_TOKEN")
                 log_message += f"❌ ViT training failed: {vit_result.stderr}\n"
                 return log_message
             service.reload_models()
+            # Check if models loaded successfully
+            model_status = service.get_model_status()
+            if model_status["can_recommend"]:
+                log_message += "\n✅ Training completed! Models reloaded and ready for inference.\n"
+                log_message += "🎉 You can now generate outfit recommendations!\n"
+            else:
+                log_message += "\n⚠️ Training completed but models failed to load properly!\n"
+                log_message += "**Model Status:**\n"
+                log_message += f"- ResNet: {'✅ Loaded' if model_status['resnet_loaded'] else '❌ Failed'}\n"
+                log_message += f"- ViT: {'✅ Loaded' if model_status['vit_loaded'] else '❌ Failed'}\n"
+                for error in model_status["errors"]:
+                    log_message += f"- {error}\n"
+            log_message += "\nArtifacts saved to models/exports/"
             # Auto-upload to HF Hub if token is available
             hf_token = os.getenv("HF_TOKEN")
         with gr.Row():
             gr.Markdown("#### 📊 **Current Behavior**")
+            gr.Markdown("• **Bootstrap**: Downloads full dataset (53K outfits) + generates splits with **2000 samples by default**\n• **Training**: Uses 2000 samples (good for early stopping demonstration!)\n• **Apply Button**: Regenerates splits with your selected size limit")
         with gr.Row():
             global_dataset_size = gr.Dropdown(
                 choices=["160", "2000", "5000", "10000", "25000", "50000", "full"],
+                value="2000",
                 label="Global Dataset Size (Affects Prep + Training)"
             )
             gr.Markdown("**160**: Ultra-fast testing (~30 sec prep, ~1-2 min training)\n**2000**: Fast testing (~1-2 min prep, ~2-5 min training)\n**5000**: Fast testing (~2-3 min prep, ~5-10 min training)\n**10000**: Good testing (~3-5 min prep, ~10-20 min training)\n**full**: Production (~5-10 min prep, ~1-4 hours training)")
         with gr.Row():
             # Apply dataset size button
             apply_size_btn = gr.Button("🔄 Apply Dataset Size & Regenerate Splits", variant="primary")
+            size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 2000 samples (click Apply to regenerate splits)", interactive=False)
             # Current dataset info
             gr.Markdown("#### 📊 **Current Dataset Status**")
+            gr.Markdown("• **Full dataset downloaded**: 53,306 outfits (required for system)\n• **Splits generated**: **2000 samples by default** (good for early stopping!)\n• **Training will use**: 2000 samples (good for early stopping demonstration!)\n• **Scale up**: Use Apply button to increase to larger sizes")
         def apply_dataset_size(size: str):
             """Apply global dataset size and regenerate splits."""
                 gr.Markdown("#### 📊 Dataset Size Control")
                 gr.Markdown("Start small for testing, increase for production training")
                 dataset_size = gr.Dropdown(
+                    choices=["160", "2000", "5000", "10000", "25000", "50000", "full"],
                     value="2000",
                     label="Training Dataset Size"
                 )
                 gr.Markdown("#### 📊 Dataset Size Control")
                 gr.Markdown("Start small for testing, increase for production training")
                 dataset_size = gr.Dropdown(
+                    choices=["160", "2000", "5000", "10000", "25000", "50000", "full"],
                     value="2000",
                     label="Training Dataset Size"
                 )
         refresh_status = gr.Button("🔄 Refresh Status")
         refresh_status.click(fn=lambda: BOOT_STATUS, inputs=[], outputs=status)
+        # Model Status
+        gr.Markdown("#### 🤖 Model Status")
+        model_status = gr.JSON(label="Model Loading Status", value=lambda: service.get_model_status())
+        refresh_models = gr.Button("🔄 Refresh Model Status")
+        refresh_models.click(fn=lambda: service.get_model_status(), inputs=[], outputs=model_status)
         # System info
         gr.Markdown("#### 💻 System Information")
         device_info = gr.Textbox(label="Device", value=lambda: f"Device: {service.device}")

inference.py CHANGED Viewed

@@ -27,20 +27,43 @@ class InferenceService:
         self.embed_dim = int(os.getenv("EMBED_DIM", "512"))
         self.resnet_version = "resnet_v1"
         self.vit_version = "vit_v1"
-        self.resnet = self._load_resnet().to(self.device).eval()
-        self.vit = self._load_vit().to(self.device).eval()
         for m in [self.resnet, self.vit]:
-            for p in m.parameters():
-                p.requires_grad_(False)
-    def _load_resnet(self) -> nn.Module:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("RESNET_CHECKPOINT", "models/exports/resnet_item_embedder.pth")
-        model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
         if strategy == "random":
-            return model
         # Try to download from Hugging Face Hub first
         try:
@@ -52,34 +75,48 @@ class InferenceService:
                 local_dir_use_symlinks=False
             )
             print(f"📥 Downloaded ResNet from HF Hub: {hf_path}")
             state = torch.load(hf_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
-            return model
         except Exception as e:
             print(f"❌ Failed to download ResNet from HF Hub: {e}")
-            print("⚠️ WARNING: Using untrained ResNet model!")
-            print("🚨 Recommendations will not be meaningful without trained weights!")
-        # Fallback to local checkpoints
         best_path = os.path.join(os.path.dirname(ckpt_path), "resnet_item_embedder_best.pth")
         if os.path.exists(best_path):
-            ckpt_to_use = best_path
-        else:
-            ckpt_to_use = ckpt_path
-        if os.path.exists(ckpt_to_use):
-            state = torch.load(ckpt_to_use, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
-            return model
-        return model
-    def _load_vit(self) -> nn.Module:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("VIT_CHECKPOINT", "models/exports/vit_outfit_model.pth")
-        model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
         if strategy == "random":
-            return model
         # Try to download from Hugging Face Hub first
         try:
@@ -91,32 +128,66 @@ class InferenceService:
                 local_dir_use_symlinks=False
             )
             print(f"📥 Downloaded ViT from HF Hub: {hf_path}")
             state = torch.load(hf_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
-            return model
         except Exception as e:
             print(f"❌ Failed to download ViT from HF Hub: {e}")
-            print("⚠️ WARNING: Using untrained ViT model!")
-            print("🚨 Recommendations will not be meaningful without trained weights!")
-        # Fallback to local checkpoints
         best_path = os.path.join(os.path.dirname(ckpt_path), "vit_outfit_model_best.pth")
-        ckpt_to_use = best_path if os.path.exists(best_path) else ckpt_path
-        if os.path.exists(ckpt_to_use):
-            state = torch.load(ckpt_to_use, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
-            return model
-        return model
     def reload_models(self) -> None:
         """Reload weights from current checkpoint locations (used after background training)."""
-        self.resnet = self._load_resnet().to(self.device).eval()
-        self.vit = self._load_vit().to(self.device).eval()
         for m in [self.resnet, self.vit]:
-            for p in m.parameters():
-                p.requires_grad_(False)
     @torch.inference_mode()
     def embed_images(self, images: List[Image.Image]) -> List[np.ndarray]:
@@ -132,6 +203,16 @@ class InferenceService:
     @torch.inference_mode()
     def compose_outfits(self, items: List[Dict[str, Any]], context: Dict[str, Any]) -> List[Dict[str, Any]]:
         # 1) Ensure embeddings for each input item
         proc_items: List[Dict[str, Any]] = []
         for it in items:
@@ -248,5 +329,15 @@ class InferenceService:
             for subset, score in topk
         ]
         return results

         self.embed_dim = int(os.getenv("EMBED_DIM", "512"))
         self.resnet_version = "resnet_v1"
         self.vit_version = "vit_v1"
+        # Model loading status tracking
+        self.models_loaded = False
+        self.model_errors = []
+        # Load models with validation
+        self.resnet, self.resnet_loaded = self._load_resnet()
+        self.vit, self.vit_loaded = self._load_vit()
+        # Move to device and set eval mode
+        if self.resnet_loaded:
+            self.resnet = self.resnet.to(self.device).eval()
+        if self.vit_loaded:
+            self.vit = self.vit.to(self.device).eval()
+        # Disable gradients
         for m in [self.resnet, self.vit]:
+            if m is not None:
+                for p in m.parameters():
+                    p.requires_grad_(False)
+        # Update overall status
+        self.models_loaded = self.resnet_loaded and self.vit_loaded
+        if not self.models_loaded:
+            self.model_errors = []
+            if not self.resnet_loaded:
+                self.model_errors.append("ResNet: No trained weights found")
+            if not self.vit_loaded:
+                self.model_errors.append("ViT: No trained weights found")
+    def _load_resnet(self) -> tuple[nn.Module, bool]:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("RESNET_CHECKPOINT", "models/exports/resnet_item_embedder.pth")
         if strategy == "random":
+            print("⚠️ Random strategy selected - no trained weights will be loaded!")
+            return ResNetItemEmbedder(embedding_dim=self.embed_dim), False
         # Try to download from Hugging Face Hub first
         try:
                 local_dir_use_symlinks=False
             )
             print(f"📥 Downloaded ResNet from HF Hub: {hf_path}")
+            model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
             state = torch.load(hf_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
+            print("✅ ResNet model loaded successfully from HF Hub")
+            return model, True
         except Exception as e:
             print(f"❌ Failed to download ResNet from HF Hub: {e}")
+        # Check for local best checkpoint first
         best_path = os.path.join(os.path.dirname(ckpt_path), "resnet_item_embedder_best.pth")
         if os.path.exists(best_path):
+            print(f"📁 Loading ResNet from best checkpoint: {best_path}")
+            model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
+            state = torch.load(best_path, map_location="cpu")
+            state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
+            model.load_state_dict(state_dict, strict=False)
+            print("✅ ResNet model loaded successfully from best checkpoint")
+            return model, True
+        # Check for regular checkpoint
+        if os.path.exists(ckpt_path):
+            print(f"📁 Loading ResNet from checkpoint: {ckpt_path}")
+            model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
+            state = torch.load(ckpt_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
+            print("✅ ResNet model loaded successfully from checkpoint")
+            return model, True
+        print("❌ CRITICAL: No trained ResNet weights found!")
+        print("🚨 Cannot provide recommendations without trained weights!")
+        print("💡 Please train the ResNet model first using the training tabs.")
+        return ResNetItemEmbedder(embedding_dim=self.embed_dim), False
+    def _load_vit(self) -> tuple[nn.Module, bool]:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("VIT_CHECKPOINT", "models/exports/vit_outfit_model.pth")
         if strategy == "random":
+            print("⚠️ Random strategy selected - no trained weights will be loaded!")
+            return OutfitCompatibilityModel(embedding_dim=self.embed_dim), False
         # Try to download from Hugging Face Hub first
         try:
                 local_dir_use_symlinks=False
             )
             print(f"📥 Downloaded ViT from HF Hub: {hf_path}")
+            model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
             state = torch.load(hf_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
+            print("✅ ViT model loaded successfully from HF Hub")
+            return model, True
         except Exception as e:
             print(f"❌ Failed to download ViT from HF Hub: {e}")
+        # Check for local best checkpoint first
         best_path = os.path.join(os.path.dirname(ckpt_path), "vit_outfit_model_best.pth")
+        if os.path.exists(best_path):
+            print(f"📁 Loading ViT from best checkpoint: {best_path}")
+            model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
+            state = torch.load(best_path, map_location="cpu")
+            state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
+            model.load_state_dict(state_dict, strict=False)
+            print("✅ ViT model loaded successfully from best checkpoint")
+            return model, True
+        # Check for regular checkpoint
+        if os.path.exists(ckpt_path):
+            print(f"📁 Loading ViT from checkpoint: {ckpt_path}")
+            model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
+            state = torch.load(ckpt_path, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
             model.load_state_dict(state_dict, strict=False)
+            print("✅ ViT model loaded successfully from checkpoint")
+            return model, True
+        print("❌ CRITICAL: No trained ViT weights found!")
+        print("🚨 Cannot provide recommendations without trained weights!")
+        print("💡 Please train the ViT model first using the training tabs.")
+        return OutfitCompatibilityModel(embedding_dim=self.embed_dim), False
     def reload_models(self) -> None:
         """Reload weights from current checkpoint locations (used after background training)."""
+        self.resnet, self.resnet_loaded = self._load_resnet()
+        self.vit, self.vit_loaded = self._load_vit()
+        # Move to device and set eval mode
+        if self.resnet_loaded:
+            self.resnet = self.resnet.to(self.device).eval()
+        if self.vit_loaded:
+            self.vit = self.vit.to(self.device).eval()
+        # Disable gradients
         for m in [self.resnet, self.vit]:
+            if m is not None:
+                for p in m.parameters():
+                    p.requires_grad_(False)
+        # Update overall status
+        self.models_loaded = self.resnet_loaded and self.vit_loaded
+        if not self.models_loaded:
+            self.model_errors = []
+            if not self.resnet_loaded:
+                self.model_errors.append("ResNet: No trained weights found")
+            if not self.vit_loaded:
+                self.model_errors.append("ViT: No trained weights found")
     @torch.inference_mode()
     def embed_images(self, images: List[Image.Image]) -> List[np.ndarray]:
     @torch.inference_mode()
     def compose_outfits(self, items: List[Dict[str, Any]], context: Dict[str, Any]) -> List[Dict[str, Any]]:
+        # Validate that models are properly loaded
+        if not self.models_loaded:
+            error_msg = f"❌ Cannot provide recommendations: Models not properly loaded. Errors: {self.model_errors}"
+            print(error_msg)
+            return [{
+                "error": "Models not trained or loaded properly",
+                "details": self.model_errors,
+                "message": "Please ensure models are trained and checkpoints exist before generating recommendations."
+            }]
         # 1) Ensure embeddings for each input item
         proc_items: List[Dict[str, Any]] = []
         for it in items:
             for subset, score in topk
         ]
         return results
+    def get_model_status(self) -> Dict[str, Any]:
+        """Get current model loading status and errors."""
+        return {
+            "models_loaded": self.models_loaded,
+            "resnet_loaded": self.resnet_loaded,
+            "vit_loaded": self.vit_loaded,
+            "errors": self.model_errors,
+            "can_recommend": self.models_loaded
+        }