Spaces:

dzmu
/

DripAI2Test

Running

App Files Files Community

dzmu commited on Apr 23, 2025

Commit

813214c

verified ·

1 Parent(s): 9c1e7e7

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -70

app.py CHANGED Viewed

@@ -5,15 +5,13 @@ import numpy as np
 import random
 import os
 from PIL import Image
-from ultralytics import YOLO # Needed for both person and fashion detection
 from gtts import gTTS
 import uuid
 import time
 import tempfile
 from huggingface_hub import hf_hub_download
-#from src.backend import analyze_outfit
-#test
-# --- Configuration ---
 HF_TOKEN = os.environ.get("HF_TOKEN")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -33,11 +31,11 @@ YOLO_FASHION_MODEL_PATH = hf_hub_download(
 CLIP_MODEL_NAME = "ViT-B/32"
 # Confidence Thresholds
-YOLO_PERSON_CONF_THRESHOLD = 0.4      # Min confidence for detecting a person
-YOLO_FASHION_CONF_THRESHOLD = 0.4     # Min confidence for detecting a fashion item
-YOLO_FASHION_HIGH_CONF_THRESHOLD = 0.6 # Higher threshold to prioritize fashion model item
-# --- Define Fashion Model Classes (IMPORTANT: Match these to your 'best.pt' training) ---
 FASHION_CLASSES = {
     0: 'long sleeve top', 1: 'skirt', 2: 'trousers', 3: 'short sleeve top',
     4: 'long sleeve outwear', 5: 'short sleeve dress', 6: 'shorts',
@@ -46,30 +44,27 @@ FASHION_CLASSES = {
 }
 print(f"Defined {len(FASHION_CLASSES)} fashion categories for {YOLO_FASHION_MODEL_PATH}")
-# --- Load Models ---
 print(f"Using device: {DEVICE}")
 try:
     clip_model, clip_preprocess = clip.load(CLIP_MODEL_NAME, device=DEVICE)
     print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
 except Exception as e:
     print(f"Error loading CLIP model: {e}")
-    # Handle error or exit if critical
 try:
-    yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH) # No .to(DEVICE) needed here for Ultralytics YOLO v8
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO person model: {e}")
-    # Handle error or exit if critical
 try:
     yolo_fashion_model = YOLO(YOLO_FASHION_MODEL_PATH) # No .to(DEVICE) needed here
     print(f"YOLO fashion detection model ({YOLO_FASHION_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO fashion model: {e}")
-    # Handle error or exit if critical - The app might still work with CLIP only
-# --- Prompts and Responses ---
 style_prompts = {
     'drippy': [
         "avant-garde streetwear", "high-fashion designer outfit", "trendsetting urban attire",
@@ -85,7 +80,7 @@ style_prompts = {
     ]
 }
-# Clothing prompts for CLIP (still useful as fallback and general context)
 clothing_prompts = [
     "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
     "dress", "skirt", "pants", "jeans", "trousers", "shorts",
@@ -93,15 +88,15 @@ clothing_prompts = [
     "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
 ]
-# Combine all prompts for CLIP
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
-style_prompts_end_index = len(all_prompts) # Mark where style prompts end dont mess up loop
 all_prompts.extend(clothing_prompts)
-#print(f"Total prompts for CLIP: {len(all_prompts)}")
-# Response Templates maybe not call out specific item all the time?
 response_templates = {
     'drippy': [
         "You're Drippy, bruh – fire {item}!",
@@ -140,24 +135,24 @@ def format_detected_items(item_list):
         return ""
     return "<p class='result-items'>Detected items: " + ", ".join(item_list) + "</p>"
-# --- Helper Functions ---
 def get_top_clip_clothing(probs, n=1):
     """Gets the top N clothing items based on CLIP probabilities."""
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
-        return [] # Return empty list if no clothing prompts
-    # Get indices and probabilities of top N items within the clothing slice
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
-    # Convert back to indices in the original all_probs array
     top_global_indices = [idx + clothing_probs_start_index for idx in top_indices_in_slice]
-    # Return list of tuples: (item_name, probability)
     top_items_with_probs = [
         (clothing_prompts[i], clothing_probs[i])
-        for i in reversed(top_indices_in_slice) # Get highest prob first
     ]
     return top_items_with_probs
 def wrapped_analyze(input_img):
@@ -178,7 +173,7 @@ def wrapped_analyze(input_img):
         DEVICE
     )
 def analyze_outfit(input_img):
-    # Handle both file paths and PIL Images
     if isinstance(input_img, str):
         try:
             input_img = Image.open(input_img)
@@ -186,38 +181,32 @@ def analyze_outfit(input_img):
             return (f"<p style='color: #FF5555;'>Error loading image: {str(e)}</p>",
                     None, "Image loading error")
-    # Existing code continues...
     if input_img is None:
         return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
                 None, "Error: No image provided.")
     img = input_img.convert("RGB").copy()
-#def analyze_outfit(image):
-    #if image is None:
-        #return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>", None, "Error: No image provided.")
-    #image = image.convert("RGB").copy()
-    #print(f"[DEBUG] image_path type: {type(image_path)} | value: {image_path}")
-    # 1) YOLO Person Detection
     person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
-    # Filter for persons (class 0 in standard YOLOv8)
     person_indices = np.where(classes == 0)[0]
-    cropped_img = img # Default to full image if no person found
     person_detected = False
     if len(person_indices) > 0:
-        # Find the person detection with the highest confidence
         max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
         x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
-        # Ensure coordinates are valid and within image bounds
         x1, y1 = max(0, x1), max(0, y1)
         x2, y2 = min(img.width, x2), min(img.height, y2)
-        if x1 < x2 and y1 < y2: # Check if the box has valid dimensions
             cropped_img = img.crop((x1, y1, x2, y2))
             print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
             person_detected = True
@@ -227,10 +216,10 @@ def analyze_outfit(input_img):
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
-    # 2) YOLO Fashion Model Detection (run on the cropped image if person was found)
     detected_fashion_item_name = None
     detected_fashion_item_conf = 0.0
-    if person_detected or True: # Or always run on the (potentially full) image? Let's always run for now.
         try:
             fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
             fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
@@ -238,7 +227,7 @@ def analyze_outfit(input_img):
             fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
             if len(fashion_classes) > 0:
-                # Find the detection with the highest confidence
                 best_fashion_idx = np.argmax(fashion_confidences)
                 detected_class_id = fashion_classes[best_fashion_idx]
                 detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
@@ -256,7 +245,7 @@ def analyze_outfit(input_img):
             print(f"Error during YOLO fashion model analysis: {e}")
             # Continue without fashion model input
-    # 3) CLIP Analysis (always run on the cropped/full image)
     clip_detected_item = "look" # Default fallback item name
     clip_detected_item_prob = 0.0
     category_key = 'mid' # Default category
@@ -270,7 +259,7 @@ def analyze_outfit(input_img):
             logits, _ = clip_model(image_tensor, text_tokens)
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
-        # Calculate style scores
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
         drip_score = np.mean(all_probs[0 : drip_len])
@@ -282,37 +271,35 @@ def analyze_outfit(input_img):
         if drip_score > 0.41 and drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
-            score_label = "Drip Score" # <<< DEFINE score_label
-        elif mid_score > not_score: # Check mid_score > not_score explicitly
             category_key = 'mid'
             final_score = mid_score
-            score_label = "Mid Score" # <<< DEFINE score_label
         else:
             category_key = 'not_drippy'
             final_score = not_score
-            score_label = "Trash Score" # <<< DEFINE score_label # Or maybe "Rating Score"
         category_label = CATEGORY_LABEL_MAP[category_key]
-        # final_score_str = f"{final_score:.2f}" # You might not need this raw score string anymore
         percentage_score = max(0, final_score * 100)
-        percentage_score_str = f"{percentage_score:.0f}%" # Formats as integer (e.g., "3%", "15%", "0%")
-        # Now score_label is defined before being used here
         print(f"Style analysis: Category={category_label}, Score = {score_label}={percentage_score_str} (Raw Score: {final_score:.4f})")
         # Get top clothing item from CLIP
-        top_3_clip_items = get_top_clip_clothing(all_probs, n=3) # <<< Ask for top 3 items
         if top_3_clip_items:
-            # Print the top 3 detected items
             detected_items_str = ", ".join([f"{item[0]} ({item[1]*100:.1f}%)" for item in top_3_clip_items]) # Show item and probability
             print(f"I think I detected: {detected_items_str}")
-            # Still use the single *most* probable item for response generation logic later
             clip_detected_item, clip_detected_item_prob = top_3_clip_items[0]
-            # Optional: You can keep or remove the print for the single top item below if the top-3 print is sufficient
-            # print(f"Top clothing item identified by CLIP (for response): '{clip_detected_item}' "
-            #       f"with probability {clip_detected_item_prob:.2f}")
         else:
              print("I couldn't confidently identify specific clothing items via CLIP.")
              clip_detected_item = "piece" # Use a different fallback if CLIP fails
@@ -320,7 +307,7 @@ def analyze_outfit(input_img):
     except Exception as e:
         print(f"Error during CLIP analysis: {e}")
-        # Use defaults, maybe return error message?
         return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
                 None, f"Analysis Error: {e}")
@@ -347,10 +334,10 @@ def analyze_outfit(input_img):
         print(f"Using generic fallback item: '{final_clothing_item}'")
-    # 5) Generate Response and TTS
     try:
         response_pool = response_templates[category_key]
-        # Choose a random template from the entire response pool
         chosen_template = random.choice(response_pool)
         # Format the response, substituting the item name if needed
@@ -561,10 +548,9 @@ custom_css = """:root {
       gap: 25px !important;
     }"""
-# --- Gradio Interface (Using the custom CSS) ---
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
-    #gr.Markdown("<h1 style='text-align: center;'>💧 DripAI: Rate Your Fit 💧</h1>")
-    #gr.Markdown("<p style='text-align: center; font-style: italic;'>AI-powered fashion judgment. Zero mercy.</p>")
     with gr.Row():
         with gr.Column(scale=1):
@@ -575,7 +561,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                 sources=['upload', 'webcam', 'clipboard'],
                 height=400,
                 show_label=False
-                #webcam_options=gr.WebcamOptions(mirror_webcam=False)
             )
             analyze_button = gr.Button("🔥 Analyze This Drip", variant="primary")
@@ -595,17 +581,14 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
         inputs=[input_image],
         outputs=[category_html, audio_output, response_box]
     )
-    #gr.Markdown("<p>Upload, paste, or use your camera to capture your outfit using the three icons. DripAI evaluates your style using multiple AI models.</p>")
-# --- Launch App ---
 if __name__ == "__main__":
-    # Make sure 'best.pt' is in the same directory or provide the full path
     if not os.path.exists(YOLO_FASHION_MODEL_PATH):
         print(f"\n{'='*20} WARNING {'='*20}")
         print(f"Fashion model file '{YOLO_FASHION_MODEL_PATH}' not found!")
         print(f"The app will run but fashion item detection will be skipped.")
         print(f"{'='*50}\n")
-        # Optionally, you could disable the fashion model part entirely here
-        # or raise an error if it's critical.
-    demo.launch(debug=True, show_error=True) # Set debug=False for deployment

 import random
 import os
 from PIL import Image
+from ultralytics import YOLO
 from gtts import gTTS
 import uuid
 import time
 import tempfile
 from huggingface_hub import hf_hub_download
 HF_TOKEN = os.environ.get("HF_TOKEN")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 CLIP_MODEL_NAME = "ViT-B/32"
 # Confidence Thresholds
+YOLO_PERSON_CONF_THRESHOLD = 0.4
+YOLO_FASHION_CONF_THRESHOLD = 0.4
+YOLO_FASHION_HIGH_CONF_THRESHOLD = 0.6
 FASHION_CLASSES = {
     0: 'long sleeve top', 1: 'skirt', 2: 'trousers', 3: 'short sleeve top',
     4: 'long sleeve outwear', 5: 'short sleeve dress', 6: 'shorts',
 }
 print(f"Defined {len(FASHION_CLASSES)} fashion categories for {YOLO_FASHION_MODEL_PATH}")
 print(f"Using device: {DEVICE}")
 try:
     clip_model, clip_preprocess = clip.load(CLIP_MODEL_NAME, device=DEVICE)
     print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
 except Exception as e:
     print(f"Error loading CLIP model: {e}")
 try:
+    yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH)
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO person model: {e}")
 try:
     yolo_fashion_model = YOLO(YOLO_FASHION_MODEL_PATH) # No .to(DEVICE) needed here
     print(f"YOLO fashion detection model ({YOLO_FASHION_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO fashion model: {e}")
 style_prompts = {
     'drippy': [
         "avant-garde streetwear", "high-fashion designer outfit", "trendsetting urban attire",
     ]
 }
 clothing_prompts = [
     "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
     "dress", "skirt", "pants", "jeans", "trousers", "shorts",
     "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
 ]
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
+style_prompts_end_index = len(all_prompts)
 all_prompts.extend(clothing_prompts)
 response_templates = {
     'drippy': [
         "You're Drippy, bruh – fire {item}!",
         return ""
     return "<p class='result-items'>Detected items: " + ", ".join(item_list) + "</p>"
 def get_top_clip_clothing(probs, n=1):
     """Gets the top N clothing items based on CLIP probabilities."""
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
+        return []
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
     top_global_indices = [idx + clothing_probs_start_index for idx in top_indices_in_slice]
     top_items_with_probs = [
         (clothing_prompts[i], clothing_probs[i])
+        for i in reversed(top_indices_in_slice)
     ]
     return top_items_with_probs
 def wrapped_analyze(input_img):
         DEVICE
     )
 def analyze_outfit(input_img):
     if isinstance(input_img, str):
         try:
             input_img = Image.open(input_img)
             return (f"<p style='color: #FF5555;'>Error loading image: {str(e)}</p>",
                     None, "Image loading error")
     if input_img is None:
         return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
                 None, "Error: No image provided.")
     img = input_img.convert("RGB").copy()
     person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
     person_indices = np.where(classes == 0)[0]
+    cropped_img = img
     person_detected = False
     if len(person_indices) > 0:
         max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
         x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
         x1, y1 = max(0, x1), max(0, y1)
         x2, y2 = min(img.width, x2), min(img.height, y2)
+        if x1 < x2 and y1 < y2:
             cropped_img = img.crop((x1, y1, x2, y2))
             print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
             person_detected = True
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
     detected_fashion_item_name = None
     detected_fashion_item_conf = 0.0
+    if person_detected or True:
         try:
             fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
             fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
             fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
             if len(fashion_classes) > 0:
                 best_fashion_idx = np.argmax(fashion_confidences)
                 detected_class_id = fashion_classes[best_fashion_idx]
                 detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
             print(f"Error during YOLO fashion model analysis: {e}")
             # Continue without fashion model input
     clip_detected_item = "look" # Default fallback item name
     clip_detected_item_prob = 0.0
     category_key = 'mid' # Default category
             logits, _ = clip_model(image_tensor, text_tokens)
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
         drip_score = np.mean(all_probs[0 : drip_len])
         if drip_score > 0.41 and drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
+            score_label = "Drip Score"
+        elif mid_score > not_score:
             category_key = 'mid'
             final_score = mid_score
+            score_label = "Mid Score"
         else:
             category_key = 'not_drippy'
             final_score = not_score
+            score_label = "Trash Score"
         category_label = CATEGORY_LABEL_MAP[category_key]
         percentage_score = max(0, final_score * 100)
+        percentage_score_str = f"{percentage_score:.0f}%"
         print(f"Style analysis: Category={category_label}, Score = {score_label}={percentage_score_str} (Raw Score: {final_score:.4f})")
         # Get top clothing item from CLIP
+        top_3_clip_items = get_top_clip_clothing(all_probs, n=3)
         if top_3_clip_items:
             detected_items_str = ", ".join([f"{item[0]} ({item[1]*100:.1f}%)" for item in top_3_clip_items]) # Show item and probability
             print(f"I think I detected: {detected_items_str}")
             clip_detected_item, clip_detected_item_prob = top_3_clip_items[0]
         else:
              print("I couldn't confidently identify specific clothing items via CLIP.")
              clip_detected_item = "piece" # Use a different fallback if CLIP fails
     except Exception as e:
         print(f"Error during CLIP analysis: {e}")
         return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
                 None, f"Analysis Error: {e}")
         print(f"Using generic fallback item: '{final_clothing_item}'")
     try:
         response_pool = response_templates[category_key]
         chosen_template = random.choice(response_pool)
         # Format the response, substituting the item name if needed
       gap: 25px !important;
     }"""
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
                 sources=['upload', 'webcam', 'clipboard'],
                 height=400,
                 show_label=False
             )
             analyze_button = gr.Button("🔥 Analyze This Drip", variant="primary")
         inputs=[input_image],
         outputs=[category_html, audio_output, response_box]
     )
 if __name__ == "__main__":
     if not os.path.exists(YOLO_FASHION_MODEL_PATH):
         print(f"\n{'='*20} WARNING {'='*20}")
         print(f"Fashion model file '{YOLO_FASHION_MODEL_PATH}' not found!")
         print(f"The app will run but fashion item detection will be skipped.")
         print(f"{'='*50}\n")
+    demo.launch(debug=False, show_error=True) # Set debug=False for deployment