Spaces:

MODLI
/

detectionotfit22222

Sleeping

App Files Files Community

MODLI commited on Sep 18, 2025

Commit

c3b9201

verified ·

1 Parent(s): 7404709

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -38

app.py CHANGED Viewed

@@ -10,25 +10,53 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
-# Catégories optimisées
 FASHION_CATEGORIES = [
-    "t-shirt", "dress", "pants", "jacket", "skirt",
-    "shoes", "bag", "swimwear", "lingerie", "sweater",
-    "jeans", "coat", "shorts", "blouse", "hat", "top",
-    "jogging pants", "dress pants", "leggings", "boots",
-    "sandals", "sneakers", "backpack", "glasses"
 ]
 print("🔧 Loading fashion model...")
-# Modèle principal
-fashion_pipe = pipeline(
-    "zero-shot-image-classification",
-    model="openai/clip-vit-base-patch32"
-)
-print("✅ Model loaded successfully!")
 # Configuration API
 API_KEYS = os.environ.get("API_KEYS", "").split(",")
@@ -38,13 +66,30 @@ class ClassificationRequest(BaseModel):
     image_data: str
     api_key: Optional[str] = None
 def load_image_from_url(url):
     """Charge une image depuis une URL de manière robuste"""
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
-        response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         # Vérifie que c'est bien une image
@@ -52,7 +97,7 @@ def load_image_from_url(url):
             raise ValueError("URL does not point to an image")
         image = Image.open(BytesIO(response.content))
-        return image.convert('RGB')
     except Exception as e:
         raise ValueError(f"❌ Cannot load image from URL: {str(e)}")
@@ -67,46 +112,59 @@ def analyze_fashion_item(image_input, url_input):
                 image = Image.fromarray(image_input)
             else:
                 image = image_input
         elif url_input and url_input.strip():
             # Utilise l'URL
             image = load_image_from_url(url_input.strip())
         else:
             return "❌ Please upload an image or enter a URL first", None
-        # Redimensionnement intelligent
-        width, height = image.size
-        if max(width, height) > 1024:
-            ratio = 1024 / max(width, height)
-            new_size = (int(width * ratio), int(height * ratio))
-            image = image.resize(new_size, Image.Resampling.LANCZOS)
-        # 🔥 ANALYSE PRINCIPALE
-        predictions = fashion_pipe(
-            image,
-            candidate_labels=FASHION_CATEGORIES,
-            hypothesis_template="a clear photo of {}",
-            multi_label=False
-        )
-        # Filtrage des résultats
-        confident_predictions = [p for p in predictions if p['score'] > 0.1]
         if not confident_predictions:
-            return "❌ No confident prediction. Try a clearer image.", image
         best_pred = confident_predictions[0]
         # Formatage des résultats
-        result_text = f"🎯 **Main item**: {best_pred['label']}\n"
         result_text += f"**Confidence**: {best_pred['score']*100:.1f}%\n\n"
         if len(confident_predictions) > 1:
             result_text += "**Other possibilities**:\n"
-            for i, pred in enumerate(confident_predictions[1:4], 1):
-                result_text += f"{i}. {pred['label']} ({pred['score']*100:.1f}%)\n"
-        result_text += f"\n💡 **Tip**: This appears to be {best_pred['label']}. "
-        result_text += "Make sure the item is well-lit and centered."
         return result_text, image
@@ -128,6 +186,9 @@ with gr.Blocks(
     .header { text-align: center; margin-bottom: 30px; }
     .input-section { background: #f8f9fa; padding: 20px; border-radius: 10px; }
     .output-section { background: white; padding: 20px; border-radius: 10px; }
     """
 ) as demo:
@@ -153,6 +214,14 @@ with gr.Blocks(
                     lines=2
                 )
                 analyze_btn = gr.Button(
                     "🔍 Analyze Item",
                     variant="primary",
@@ -178,6 +247,7 @@ with gr.Blocks(
         - Make sure the clothing item is clearly visible
         - Well-lit images work best
         - Avoid busy backgrounds
         """)
     # Événement de click
@@ -210,6 +280,7 @@ async def api_classify(request: ClassificationRequest):
         image_bytes = base64.b64decode(request.image_data)
         image = Image.open(BytesIO(image_bytes))
         # Analyse avec des inputs vides pour URL
         result_text, processed_image = analyze_fashion_item(image, "")

 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
+import torch
+import torchvision.transforms as transforms
+from torchvision.models import resnet50
+import torch.nn as nn
+# Catégories fashion plus détaillées et précises
 FASHION_CATEGORIES = [
+    "t-shirt", "dress", "jeans", "jacket", "skirt",
+    "sneakers", "handbag", "swimsuit", "lingerie", "sweater",
+    "coat", "shorts", "blouse", "hat", "top",
+    "sweatpants", "dress pants", "leggings", "boots",
+    "sandals", "heels", "backpack", "sunglasses", "blazer",
+    "cardigan", "polo shirt", "hoodie", "vest", "jumpsuit",
+    "romper", "crop top", "tank top", "long sleeve shirt",
+    "windbreaker", "parka", "trench coat", "leather jacket",
+    "denim jacket", "waistcoat", "suit", "tie", "scarf",
+    "gloves", "belt", "wallet", "watch", "jewelry"
 ]
 print("🔧 Loading fashion model...")
+# Charger un modèle plus spécialisé pour la mode
+try:
+    # Essayer d'abord un modèle spécialisé fashion
+    fashion_pipe = pipeline(
+        "image-classification",
+        model="nateraw/fashion-clip",
+        device=0 if torch.cuda.is_available() else -1
+    )
+    print("✅ Fashion-CLIP model loaded successfully!")
+except:
+    try:
+        # Fallback sur un modèle plus général mais avec fine-tuning
+        fashion_pipe = pipeline(
+            "zero-shot-image-classification",
+            model="openai/clip-vit-large-patch14",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        print("✅ CLIP Large model loaded successfully!")
+    except:
+        # Dernier recours
+        fashion_pipe = pipeline(
+            "zero-shot-image-classification",
+            model="openai/clip-vit-base-patch32",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        print("✅ CLIP Base model loaded as fallback!")
 # Configuration API
 API_KEYS = os.environ.get("API_KEYS", "").split(",")
     image_data: str
     api_key: Optional[str] = None
+def preprocess_image(image):
+    """Prétraite l'image pour améliorer la détection"""
+    # Conversion en RGB si nécessaire
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    # Redimensionnement intelligent avec maintien des proportions
+    width, height = image.size
+    max_size = 512
+    if max(width, height) > max_size:
+        ratio = max_size / max(width, height)
+        new_size = (int(width * ratio), int(height * ratio))
+        image = image.resize(new_size, Image.Resampling.LANCZOS)
+    return image
 def load_image_from_url(url):
     """Charge une image depuis une URL de manière robuste"""
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
+        response = requests.get(url, headers=headers, timeout=15)
         response.raise_for_status()
         # Vérifie que c'est bien une image
             raise ValueError("URL does not point to an image")
         image = Image.open(BytesIO(response.content))
+        return preprocess_image(image)
     except Exception as e:
         raise ValueError(f"❌ Cannot load image from URL: {str(e)}")
                 image = Image.fromarray(image_input)
             else:
                 image = image_input
+            image = preprocess_image(image)
         elif url_input and url_input.strip():
             # Utilise l'URL
             image = load_image_from_url(url_input.strip())
         else:
             return "❌ Please upload an image or enter a URL first", None
+        # 🔥 ANALYSE PRINCIPALE AVEC PARAMÈTRES OPTIMISÉS
+        try:
+            # Essayer d'abord avec le modèle fashion-clip
+            predictions = fashion_pipe(image)
+            # Si c'est le modèle fashion-clip, adapter le format de réponse
+            if hasattr(fashion_pipe, 'model') and 'fashion-clip' in str(fashion_pipe.model):
+                # Trier par score et formater
+                predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)
+                confident_predictions = [p for p in predictions if p['score'] > 0.05]
+            else:
+                # Pour les modèles zero-shot
+                predictions = fashion_pipe(
+                    image,
+                    candidate_labels=FASHION_CATEGORIES,
+                    hypothesis_template="a clear photo of {}",
+                    multi_label=True
+                )
+                confident_predictions = [p for p in predictions if p['score'] > 0.1]
+        except Exception as model_error:
+            print(f"Model error: {model_error}")
+            return "❌ Model analysis failed. Please try another image.", image
         if not confident_predictions:
+            return "❌ No confident prediction. Try a clearer image with better lighting.", image
+        # Trier par score décroissant
+        confident_predictions.sort(key=lambda x: x['score'], reverse=True)
         best_pred = confident_predictions[0]
         # Formatage des résultats
+        result_text = f"🎯 **Main item**: {best_pred['label'].title()}\n"
         result_text += f"**Confidence**: {best_pred['score']*100:.1f}%\n\n"
         if len(confident_predictions) > 1:
             result_text += "**Other possibilities**:\n"
+            for i, pred in enumerate(confident_predictions[1:6], 1):  # Top 5 seulement
+                result_text += f"{i}. {pred['label'].title()} ({pred['score']*100:.1f}%)\n"
+        # Conseils basés sur la confiance
+        if best_pred['score'] < 0.7:
+            result_text += f"\n💡 **Tip**: Low confidence. Try a clearer image with the item centered and good lighting."
+        else:
+            result_text += f"\n✅ **High confidence detection**: This is very likely a {best_pred['label']}."
         return result_text, image
     .header { text-align: center; margin-bottom: 30px; }
     .input-section { background: #f8f9fa; padding: 20px; border-radius: 10px; }
     .output-section { background: white; padding: 20px; border-radius: 10px; }
+    .success { color: green; }
+    .warning { color: orange; }
+    .error { color: red; }
     """
 ) as demo:
                     lines=2
                 )
+                gr.Markdown("""
+                **📝 Tips for better results:**
+                - Use clear, well-lit images
+                - Center the clothing item
+                - Use plain backgrounds when possible
+                - Avoid multiple items in one image
+                """)
                 analyze_btn = gr.Button(
                     "🔍 Analyze Item",
                     variant="primary",
         - Make sure the clothing item is clearly visible
         - Well-lit images work best
         - Avoid busy backgrounds
+        - For best results, show one item at a time
         """)
     # Événement de click
         image_bytes = base64.b64decode(request.image_data)
         image = Image.open(BytesIO(image_bytes))
+        image = preprocess_image(image)
         # Analyse avec des inputs vides pour URL
         result_text, processed_image = analyze_fashion_item(image, "")