Spaces:

Stylique
/

recomendation

Paused

App Files Files Community

Ali Mohsin commited on Sep 3, 2025

Commit

3b3cac8

1 Parent(s): 24ea486

new fixes

Browse files

Files changed (2) hide show

app.py +54 -10
utils/data_fetch.py +23 -8

app.py CHANGED Viewed

@@ -415,6 +415,9 @@ def gradio_recommend(files: List[str], occasion: str, weather: str, num_outfits:
 def start_training_advanced(
     # ResNet parameters
     resnet_epochs: int, resnet_batch_size: int, resnet_lr: float, resnet_optimizer: str,
     resnet_weight_decay: float, resnet_triplet_margin: float, resnet_embedding_dim: int,
@@ -511,10 +514,16 @@ def start_training_advanced(
             # Train ResNet with custom parameters
             log_message = f"🚀 Starting ResNet training with custom parameters...\n"
             log_message += f"Backbone: {resnet_backbone}, Embedding Dim: {resnet_embedding_dim}\n"
             log_message += f"Epochs: {resnet_epochs}, Batch Size: {resnet_batch_size}, LR: {resnet_lr}\n"
             log_message += f"Optimizer: {resnet_optimizer}, Triplet Margin: {resnet_triplet_margin}\n"
             resnet_cmd = [
                 "python", "train_resnet.py",
                 "--data_root", DATASET_ROOT,
@@ -525,7 +534,7 @@ def start_training_advanced(
                 "--triplet_margin", str(resnet_triplet_margin),
                 "--embedding_dim", str(resnet_embedding_dim),
                 "--out", os.path.join(export_dir, "resnet_item_embedder_custom.pth")
-            ]
             if resnet_backbone != "resnet50":
                 resnet_cmd.extend(["--backbone", resnet_backbone])
@@ -540,6 +549,7 @@ def start_training_advanced(
             # Train ViT with custom parameters
             log_message += f"🚀 Starting ViT training with custom parameters...\n"
             log_message += f"Layers: {vit_num_layers}, Heads: {vit_num_heads}, FF Multiplier: {vit_ff_multiplier}\n"
             log_message += f"Epochs: {vit_epochs}, Batch Size: {vit_batch_size}, LR: {vit_lr}\n"
             log_message += f"Optimizer: {vit_optimizer}, Triplet Margin: {vit_triplet_margin}\n"
@@ -554,7 +564,7 @@ def start_training_advanced(
                 "--triplet_margin", str(vit_triplet_margin),
                 "--embedding_dim", str(vit_embedding_dim),
                 "--export", os.path.join(export_dir, "vit_outfit_model_custom.pth")
-            ]
             result = subprocess.run(vit_cmd, capture_output=True, text=True, check=False)
@@ -593,7 +603,7 @@ def start_training_advanced(
     return log_message
-def start_training_simple(res_epochs: int, vit_epochs: int):
     """Start simple training with basic parameters."""
     log_message = "Starting training..."
     def _runner():
@@ -605,16 +615,21 @@ def start_training_simple(res_epochs: int, vit_epochs: int):
                 return
             export_dir = os.getenv("EXPORT_DIR", "models/exports")
             os.makedirs(export_dir, exist_ok=True)
-            log_message = "Training ResNet…\n"
             subprocess.run([
                 "python", "train_resnet.py", "--data_root", DATASET_ROOT, "--epochs", str(res_epochs),
                 "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
-            ], check=False)
-            log_message += "\nTraining ViT (triplet)…\n"
             subprocess.run([
                 "python", "train_vit_triplet.py", "--data_root", DATASET_ROOT, "--epochs", str(vit_epochs),
                 "--export", os.path.join(export_dir, "vit_outfit_model.pth")
-            ], check=False)
             service.reload_models()
             log_message += "\nDone. Artifacts in models/exports."
@@ -644,6 +659,7 @@ def start_training_simple(res_epochs: int, vit_epochs: int):
 with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendation") as demo:
     gr.Markdown("## 🏆 Dressify – Advanced Outfit Recommendation System\n*Research-grade, self-contained outfit recommendation with comprehensive training controls*")
     with gr.Tab("🎨 Recommend"):
         inp2 = gr.Files(label="Upload wardrobe images", file_types=["image"], file_count="multiple")
@@ -660,6 +676,16 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
         gr.Markdown("### 🎯 Comprehensive Training Parameter Control\nCustomize every aspect of model training for research and experimentation.")
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("#### 🖼️ ResNet Item Embedder")
@@ -768,6 +794,9 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
         start_advanced_btn.click(
             fn=start_training_advanced,
             inputs=[
                 # ResNet parameters
                 resnet_epochs, resnet_batch_size, resnet_lr, resnet_optimizer,
                 resnet_weight_decay, resnet_triplet_margin, resnet_embedding_dim,
@@ -838,11 +867,26 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
     with gr.Tab("🔧 Simple Training"):
         gr.Markdown("### 🚀 Quick Training with Default Parameters\nFast training with proven configurations for immediate results.")
-        epochs_res = gr.Slider(1, 50, value=10, step=1, label="ResNet epochs")
-        epochs_vit = gr.Slider(1, 100, value=20, step=1, label="ViT epochs")
         train_log = gr.Textbox(label="Training Log", lines=10)
         start_btn = gr.Button("Start Training")
-        start_btn.click(fn=start_training_simple, inputs=[epochs_res, epochs_vit], outputs=train_log)
     with gr.Tab("📊 Embed (Debug)"):
         inp = gr.Files(label="Upload Items (multiple images)")

 def start_training_advanced(
+    # Dataset size
+    dataset_size: str,
     # ResNet parameters
     resnet_epochs: int, resnet_batch_size: int, resnet_lr: float, resnet_optimizer: str,
     resnet_weight_decay: float, resnet_triplet_margin: float, resnet_embedding_dim: int,
             # Train ResNet with custom parameters
             log_message = f"🚀 Starting ResNet training with custom parameters...\n"
+            log_message += f"Dataset Size: {dataset_size} samples\n"
             log_message += f"Backbone: {resnet_backbone}, Embedding Dim: {resnet_embedding_dim}\n"
             log_message += f"Epochs: {resnet_epochs}, Batch Size: {resnet_batch_size}, LR: {resnet_lr}\n"
             log_message += f"Optimizer: {resnet_optimizer}, Triplet Margin: {resnet_triplet_margin}\n"
+            # Add dataset size limit if not full
+            dataset_args = []
+            if dataset_size != "full":
+                dataset_args = ["--max_samples", dataset_size]
             resnet_cmd = [
                 "python", "train_resnet.py",
                 "--data_root", DATASET_ROOT,
                 "--triplet_margin", str(resnet_triplet_margin),
                 "--embedding_dim", str(resnet_embedding_dim),
                 "--out", os.path.join(export_dir, "resnet_item_embedder_custom.pth")
+            ] + dataset_args
             if resnet_backbone != "resnet50":
                 resnet_cmd.extend(["--backbone", resnet_backbone])
             # Train ViT with custom parameters
             log_message += f"🚀 Starting ViT training with custom parameters...\n"
+            log_message += f"Dataset Size: {dataset_size} samples\n"
             log_message += f"Layers: {vit_num_layers}, Heads: {vit_num_heads}, FF Multiplier: {vit_ff_multiplier}\n"
             log_message += f"Epochs: {vit_epochs}, Batch Size: {vit_batch_size}, LR: {vit_lr}\n"
             log_message += f"Optimizer: {vit_optimizer}, Triplet Margin: {vit_triplet_margin}\n"
                 "--triplet_margin", str(vit_triplet_margin),
                 "--embedding_dim", str(vit_embedding_dim),
                 "--export", os.path.join(export_dir, "vit_outfit_model_custom.pth")
+            ] + dataset_args
             result = subprocess.run(vit_cmd, capture_output=True, text=True, check=False)
     return log_message
+def start_training_simple(dataset_size: str, res_epochs: int, vit_epochs: int):
     """Start simple training with basic parameters."""
     log_message = "Starting training..."
     def _runner():
                 return
             export_dir = os.getenv("EXPORT_DIR", "models/exports")
             os.makedirs(export_dir, exist_ok=True)
+            log_message = f"Training ResNet on {dataset_size} samples...\n"
+            # Add dataset size limit if not full
+            dataset_args = []
+            if dataset_size != "full":
+                dataset_args = ["--max_samples", dataset_size]
             subprocess.run([
                 "python", "train_resnet.py", "--data_root", DATASET_ROOT, "--epochs", str(res_epochs),
                 "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
+            ] + dataset_args, check=False)
+            log_message += f"\nTraining ViT (triplet) on {dataset_size} samples...\n"
             subprocess.run([
                 "python", "train_vit_triplet.py", "--data_root", DATASET_ROOT, "--epochs", str(vit_epochs),
                 "--export", os.path.join(export_dir, "vit_outfit_model.pth")
+            ] + dataset_args, check=False)
             service.reload_models()
             log_message += "\nDone. Artifacts in models/exports."
 with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendation") as demo:
     gr.Markdown("## 🏆 Dressify – Advanced Outfit Recommendation System\n*Research-grade, self-contained outfit recommendation with comprehensive training controls*")
+    gr.Markdown("💡 **Pro Tip**: Start with 2000 samples for quick testing, then increase to 50000+ for production training!")
     with gr.Tab("🎨 Recommend"):
         inp2 = gr.Files(label="Upload wardrobe images", file_types=["image"], file_count="multiple")
         gr.Markdown("### 🎯 Comprehensive Training Parameter Control\nCustomize every aspect of model training for research and experimentation.")
         with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("#### 📊 Dataset Size Control")
+                gr.Markdown("Start small for testing, increase for production training")
+                dataset_size = gr.Dropdown(
+                    choices=["2000", "5000", "10000", "25000", "50000", "full"],
+                    value="2000",
+                    label="Training Dataset Size"
+                )
+                gr.Markdown("**2000**: Quick testing (~2-5 min)\n**5000**: Fast validation (~5-10 min)\n**10000**: Good validation (~10-20 min)\n**25000+**: Production training")
             with gr.Column(scale=1):
                 gr.Markdown("#### 🖼️ ResNet Item Embedder")
         start_advanced_btn.click(
             fn=start_training_advanced,
             inputs=[
+                # Dataset size
+                dataset_size,
                 # ResNet parameters
                 resnet_epochs, resnet_batch_size, resnet_lr, resnet_optimizer,
                 resnet_weight_decay, resnet_triplet_margin, resnet_embedding_dim,
     with gr.Tab("🔧 Simple Training"):
         gr.Markdown("### 🚀 Quick Training with Default Parameters\nFast training with proven configurations for immediate results.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("#### 📊 Dataset Size Control")
+                gr.Markdown("Start small for testing, increase for production training")
+                dataset_size = gr.Dropdown(
+                    choices=["2000", "5000", "10000", "25000", "50000", "full"],
+                    value="2000",
+                    label="Training Dataset Size"
+                )
+                gr.Markdown("**2000**: Quick testing (~2-5 min)\n**5000**: Fast validation (~5-10 min)\n**10000**: Good validation (~10-20 min)\n**25000+**: Production training")
+            with gr.Column(scale=1):
+                gr.Markdown("#### ⚙️ Training Parameters")
+                epochs_res = gr.Slider(1, 50, value=10, step=1, label="ResNet epochs")
+                epochs_vit = gr.Slider(1, 100, value=20, step=1, label="ViT epochs")
         train_log = gr.Textbox(label="Training Log", lines=10)
         start_btn = gr.Button("Start Training")
+        start_btn.click(fn=start_training_simple, inputs=[dataset_size, epochs_res, epochs_vit], outputs=train_log)
     with gr.Tab("📊 Embed (Debug)"):
         inp = gr.Files(label="Upload Items (multiple images)")

utils/data_fetch.py CHANGED Viewed

@@ -170,8 +170,13 @@ def ensure_dataset_ready() -> Optional[str]:
         for meta_file in metadata_files:
             meta_path = os.path.join(root, meta_file)
             if os.path.exists(meta_path):
-                size_mb = os.path.getsize(meta_path) / (1024 * 1024)
-                print(f"📋 {meta_file}: {size_mb:.1f} MB")
             else:
                 print(f"⚠️ Missing: {meta_file}")
@@ -209,10 +214,15 @@ def check_dataset_structure(root: str) -> dict:
     for meta_file in metadata_files:
         meta_path = os.path.join(root, meta_file)
         if os.path.exists(meta_path):
-            size_mb = os.path.getsize(meta_path) / (1024 * 1024)
-            structure["metadata"][meta_file] = {"exists": True, "size_mb": size_mb}
         else:
-            structure["metadata"][meta_file] = {"exists": False, "size_mb": 0}
     # Check for splits
     split_locations = [
@@ -229,10 +239,15 @@ def check_dataset_structure(root: str) -> dict:
             for split_file in files:
                 split_path = os.path.join(location_path, split_file)
                 if os.path.exists(split_path):
-                    size_mb = os.path.getsize(split_path) / (1024 * 1024)
-                    structure["splits"][location][split_file] = {"exists": True, "size_mb": size_mb}
                 else:
-                    structure["splits"][location][split_file] = {"exists": False, "size_mb": 0}
         else:
             structure["splits"][location] = "directory_not_found"

         for meta_file in metadata_files:
             meta_path = os.path.join(root, meta_file)
             if os.path.exists(meta_path):
+                size_bytes = os.path.getsize(meta_path)
+                if size_bytes < 1024 * 1024:  # Less than 1MB
+                    size_kb = size_bytes / 1024
+                    print(f"📋 {meta_file}: {size_kb:.1f} KB")
+                else:
+                    size_mb = size_bytes / (1024 * 1024)
+                    print(f"📋 {meta_file}: {size_mb:.1f} MB")
             else:
                 print(f"⚠️ Missing: {meta_file}")
     for meta_file in metadata_files:
         meta_path = os.path.join(root, meta_file)
         if os.path.exists(meta_path):
+            size_bytes = os.path.getsize(meta_path)
+            if size_bytes < 1024 * 1024:  # Less than 1MB
+                size_kb = size_bytes / 1024
+                structure["metadata"][meta_file] = {"exists": True, "size_kb": size_kb}
+            else:
+                size_mb = size_bytes / (1024 * 1024)
+                structure["metadata"][meta_file] = {"exists": True, "size_mb": size_mb}
         else:
+            structure["metadata"][meta_file] = {"exists": False, "size_mb": 0, "size_kb": 0}
     # Check for splits
     split_locations = [
             for split_file in files:
                 split_path = os.path.join(location_path, split_file)
                 if os.path.exists(split_path):
+                    size_bytes = os.path.getsize(split_path)
+                    if size_bytes < 1024 * 1024:  # Less than 1MB
+                        size_kb = size_bytes / 1024
+                        structure["splits"][location][split_file] = {"exists": True, "size_kb": size_kb}
+                    else:
+                        size_mb = size_bytes / (1024 * 1024)
+                        structure["splits"][location][split_file] = {"exists": True, "size_mb": size_mb}
                 else:
+                    structure["splits"][location][split_file] = {"exists": False, "size_mb": 0, "size_kb": 0}
         else:
             structure["splits"][location] = "directory_not_found"