Spaces:

dzmu
/

DripAI2Test

Running

App Files Files Community

dzmu commited on Apr 3, 2025

Commit

d0d6795

verified ·

1 Parent(s): 16ed44a

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -207

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 import random
 import os
 from PIL import Image
-from ultralytics import YOLO # Still needed for person detection
 from gtts import gTTS
 import uuid
 import time
@@ -14,9 +14,23 @@ import tempfile
 # --- Configuration ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 YOLO_PERSON_MODEL_PATH = 'yolov8n.pt'  # Standard YOLOv8 for person detection
-# YOLO_FASHION_MODEL_PATH = 'best.pt' # REMOVED - Not using fashion model anymore
 CLIP_MODEL_NAME = "ViT-B/32"
 # --- Load Models ---
 print(f"Using device: {DEVICE}")
 try:
@@ -24,15 +38,21 @@ try:
     print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
 except Exception as e:
     print(f"Error loading CLIP model: {e}")
-    # Handle error
 try:
-    yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH).to(DEVICE)
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO person model: {e}")
-    # Handle error
-# REMOVED Fashion Model Loading
 # --- Prompts and Responses ---
 style_prompts = {
@@ -50,7 +70,7 @@ style_prompts = {
     ]
 }
-# --- REINSTATED: Clothing prompts for CLIP ---
 clothing_prompts = [
     "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
     "dress", "skirt", "pants", "jeans", "trousers", "shorts",
@@ -58,16 +78,15 @@ clothing_prompts = [
     "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
 ]
-# --- REINSTATED: Combine all prompts for CLIP ---
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
-# Record end of style prompts before adding clothing prompts
-style_prompts_end_index = len(all_prompts)
 all_prompts.extend(clothing_prompts)
 print(f"Total prompts for CLIP: {len(all_prompts)}")
 response_templates = {
     'drippy': [
         "You're Drippy, bruh – fire {item}!", "{item} goes crazy, on god!", "Certified drippy with that {item}."
@@ -79,21 +98,33 @@ response_templates = {
     'not_drippy': [
         "Bro thought that {item} was tuff!", "Oh hell nah! Burn that {item}!",
         "Crimes against fashion, especially that {item}! Also… maybe get a haircut.",
-        "Never walk out the house again with that {item}."
     ]
 }
 CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
-# --- REINSTATED: Function to get top clothing items based on CLIP probabilities ---
-def get_top_clothing(probs, n=3):
     """Gets the top N clothing items based on CLIP probabilities."""
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
-        return ["item"]
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
-    return [clothing_prompts[i] for i in reversed(top_indices_in_slice)]
 # --- Core Logic ---
 def analyze_outfit(input_img: Image.Image):
@@ -101,30 +132,72 @@ def analyze_outfit(input_img: Image.Image):
         return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
                 None, "Error: No image provided.")
-    img = input_img.copy()
     # 1) YOLO Person Detection
-    person_results = yolo_person_model(img, verbose=False)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
     person_indices = np.where(classes == 0)[0]
-    cropped_img = img
     if len(person_indices) > 0:
         max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
         x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
         x1, y1 = max(0, x1), max(0, y1)
         x2, y2 = min(img.width, x2), min(img.height, y2)
-        if x1 < x2 and y1 < y2:
-             cropped_img = img.crop((x1, y1, x2, y2))
-             print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
         else:
             print("Warning: Invalid person bounding box after clipping. Using full image.")
             cropped_img = img
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
-    # 2) CLIP Analysis
-    detected_clothing_item = "look"
     try:
         image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
         text_tokens = clip.tokenize(all_prompts).to(DEVICE)
@@ -133,12 +206,14 @@ def analyze_outfit(input_img: Image.Image):
             logits, _ = clip_model(image_tensor, text_tokens)
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
         drip_score = np.mean(all_probs[0 : drip_len])
         mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
         not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
         if drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
@@ -153,29 +228,80 @@ def analyze_outfit(input_img: Image.Image):
         final_score_str = f"{final_score:.2f}"
         print(f"Style analysis: Category={category_label}, Score={final_score_str}")
-        clothing_items_detected_by_clip = get_top_clothing(all_probs, n=1)
-        if clothing_items_detected_by_clip:
-             detected_clothing_item = clothing_items_detected_by_clip[0]
-             print(f"Top clothing item identified by CLIP: {detected_clothing_item}")
         else:
              print("Warning: CLIP did not identify a top clothing item.")
-             detected_clothing_item = "fit"
     except Exception as e:
-        print(f"Error during CLIP analysis or clothing selection: {e}")
-        return ("<p style='color: #FF5555;'>Error during analysis.</p>",
                 None, f"Analysis Error: {e}")
-    # 3) Generate Response and TTS
     try:
-        response_text = random.choice(response_templates[category_key]).format(item=detected_clothing_item)
         tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
         tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
         tts.save(tts_path)
         print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
         # --- Updated HTML Output ---
-        # Simpler structure, relies more on CSS for styling defined below
         category_html = f"""
             <div class='results-container'>
                 <h2 class='result-category'>RATING: {category_label.upper()}</h2>
@@ -192,205 +318,57 @@ def analyze_outfit(input_img: Image.Image):
                 <p class='result-score' style='color: #FFAAAA;'>Error generating audio/full response.</p>
             </div>
         """
         return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
-# --- Elite Fashion / Techno CSS ---
-custom_css = """
-:root {
-    --primary-bg-color: #000000;
-    --secondary-bg-color: #1A1A1A;
-    --text-color: #FFFFFF;
-    --accent-color: #1F04FF;
-    --border-color: #333333; /* Slightly lighter than secondary bg for subtle definition */
-    --input-bg-color: #1A1A1A;
-    --button-text-color: #FFFFFF;
-    --body-text-size: 16px; /* Base text size */
-}
-/* --- Global Styles --- */
-body, .gradio-container {
-    background-color: var(--primary-bg-color) !important;
-    color: var(--text-color) !important;
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; /* Modern font stack */
-    font-size: var(--body-text-size);
-}
-/* Hide default Gradio footer */
-footer { display: none !important; }
-/* --- Component Styling --- */
-.gr-block { /* General block container */
-    background-color: var(--secondary-bg-color) !important;
-    border: 1px solid var(--border-color) !important;
-    border-radius: 8px !important; /* Slightly rounded corners */
-    padding: 15px !important;
-    box-shadow: none !important; /* Remove default shadows */
-}
-/* Input/Output Text Areas & General inputs */
-.gr-input, .gr-output, .gr-textbox textarea, .gr-dropdown select, .gr-checkboxgroup input {
-    background-color: var(--input-bg-color) !important;
-    color: var(--text-color) !important;
-    border: 1px solid var(--border-color) !important;
-    border-radius: 5px !important;
-}
-.gr-textbox textarea::placeholder { /* Style placeholder text if needed */
-    color: #888888 !important;
-}
-/* Component Labels */
-.gr-label span, .gr-label .label-text {
-    color: var(--text-color) !important;
-    font-weight: 500 !important; /* Slightly bolder labels */
-    font-size: 0.95em !important;
-    margin-bottom: 8px !important; /* Space below label */
-}
-/* Image Input/Output */
-.gr-image {
-   background-color: var(--primary-bg-color) !important; /* Match main background */
-   border: 1px dashed var(--border-color) !important; /* Dashed border for drop zone */
-   border-radius: 8px !important;
-   overflow: hidden; /* Ensure image stays within bounds */
-}
-.gr-image img {
-    border-radius: 6px !important; /* Slightly round image corners */
-    object-fit: contain; /* Ensure image fits well */
-}
-.gr-image .no-image, .gr-image .upload-button { /* Placeholder text/button inside image component */
-     color: #AAAAAA !important;
-}
-/* Audio Component */
-.gr-audio > div:first-of-type { /* Target the container around the audio player */
-    border: 1px solid var(--border-color) !important;
-    background-color: var(--secondary-bg-color) !important;
-    border-radius: 5px !important;
-    padding: 10px !important;
-}
-.gr-audio audio { /* Style the audio player itself */
-    width: 100%; /* Make player responsive */
-    filter: invert(1) hue-rotate(180deg); /* Basic dark theme for player controls */
-}
-/* --- Button Styling --- */
-.gr-button { /* General button style reset */
-    border: none !important;
-    border-radius: 5px !important;
-    transition: background-color 0.2s ease, transform 0.1s ease;
-    font-weight: 600 !important;
-}
-.gr-button-primary { /* Specific styling for the primary Analyze button */
-    background-color: var(--accent-color) !important;
-    color: var(--button-text-color) !important;
-    font-size: 1.1em !important; /* Make primary button slightly larger */
-    padding: 12px 20px !important; /* Adjust padding */
-}
-.gr-button-primary:hover {
-    background-color: #482FFF !important; /* Slightly lighter blue on hover */
-    transform: scale(1.02); /* Subtle scale effect */
-    box-shadow: 0 0 10px var(--accent-color); /* Add a glow effect */
-}
-.gr-button-primary:active {
-    transform: scale(0.98); /* Press down effect */
-}
-/* --- Typography & Content --- */
-h1, h2, h3 {
-    color: var(--text-color) !important;
-    font-weight: 600; /* Bold headings */
-    letter-spacing: 0.5px; /* Add slight letter spacing */
-}
-.prose h1 { /* Target Markdown H1 specifically if needed */
-    text-align: center;
-    margin-bottom: 25px !important;
-    font-size: 2em !important; /* Larger title */
-    text-transform: uppercase; /* Uppercase for impact */
-    letter-spacing: 1.5px;
-}
-.prose p { /* Target Markdown Paragraph */
-     color: #CCCCCC !important; /* Slightly dimmer text for descriptions */
-     font-size: 0.95em;
-     text-align: center;
-}
-/* Custom styling for the results HTML block */
-.results-container {
-    text-align: center;
-    padding: 20px;
-    border: 1px solid var(--accent-color); /* Use accent color for border */
-    border-radius: 8px;
-    background: linear-gradient(145deg, var(--secondary-bg-color), #2a2a2a); /* Subtle gradient */
-}
-.result-category {
-    color: var(--accent-color) !important; /* Use accent color for category */
-    font-size: 1.5em;
-    margin-bottom: 5px;
-    font-weight: 700;
-    text-transform: uppercase;
-}
-.result-score {
-    color: var(--text-color) !important;
-    font-size: 1.1em;
-    margin-top: 0;
-}
-/* --- Layout Adjustments --- */
-.gradio-container {
-    max-width: 850px !important; /* Slightly wider max-width */
-    margin: auto !important;
-    padding-top: 30px; /* Add some space at the top */
-}
-.gr-row {
-    gap: 25px !important; /* Increase gap between columns */
-}
-"""
-# --- Gradio Interface (Now using the custom CSS) ---
-with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="neutral", secondary_hue="neutral", text_size=gr.themes.sizes.text_lg)) as demo: # Use Base theme to minimize default styles
-    # Title using Markdown (styled by CSS)
     gr.Markdown("<h1>💧 DripAI: Rate Your Fit 💧</h1>")
     with gr.Row():
-        with gr.Column(scale=1, min_width=350): # Assign min width for better responsiveness
             input_image = gr.Image(
                 type='pil',
-                label="Upload Your Outfit", # Simpler label
                 sources=['upload', 'webcam', 'clipboard'],
-                height=450 # Slightly taller image area
             )
             analyze_button = gr.Button(
                 "Analyze Outfit",
                 variant="primary",
-                # size="lg" removed, controlled by CSS
             )
-        with gr.Column(scale=1, min_width=350): # Assign min width
-            gr.Markdown("### ANALYSIS RESULTS") # Simple heading
-            category_html = gr.HTML(label="Rating & Score") # Label for screen readers/context
             response_box = gr.Textbox(
                 lines=3,
-                label="Verbal Feedback", # Updated label
                 interactive=False
             )
             audio_output = gr.Audio(
-                autoplay=True, # Changed default to false, user can click play
                 label="Audio Feedback",
-                streaming=False
             )
-    # Bind the analysis function to the button click
     analyze_button.click(
         fn=analyze_outfit,
         inputs=[input_image],
         outputs=[category_html, audio_output, response_box]
     )
-    # Footer description text
-    gr.Markdown("<p>Upload, paste, or use your webcam to capture your outfit. DripAI evaluates your style.</p>")
 # --- Launch App ---
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import random
 import os
 from PIL import Image
+from ultralytics import YOLO # Needed for both person and fashion detection
 from gtts import gTTS
 import uuid
 import time
 # --- Configuration ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 YOLO_PERSON_MODEL_PATH = 'yolov8n.pt'  # Standard YOLOv8 for person detection
+YOLO_FASHION_MODEL_PATH = 'best.pt'    # <<< Your custom fashion model path
 CLIP_MODEL_NAME = "ViT-B/32"
+# Confidence Thresholds
+YOLO_PERSON_CONF_THRESHOLD = 0.4      # Min confidence for detecting a person
+YOLO_FASHION_CONF_THRESHOLD = 0.4     # Min confidence for detecting a fashion item
+YOLO_FASHION_HIGH_CONF_THRESHOLD = 0.6 # Higher threshold to prioritize fashion model item
+# --- Define Fashion Model Classes (IMPORTANT: Match these to your 'best.pt' training) ---
+FASHION_CLASSES = {
+    0: 'long sleeve top', 1: 'skirt', 2: 'trousers', 3: 'short sleeve top',
+    4: 'long sleeve outwear', 5: 'short sleeve dress', 6: 'shorts',
+    7: 'vest dress', 8: 'sling dress', 9: 'vest', 10: 'long sleeve dress',
+    11: 'sling', 12: 'short sleeve outwear'
+}
+print(f"Defined {len(FASHION_CLASSES)} fashion categories for {YOLO_FASHION_MODEL_PATH}")
 # --- Load Models ---
 print(f"Using device: {DEVICE}")
 try:
     print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
 except Exception as e:
     print(f"Error loading CLIP model: {e}")
+    # Handle error or exit if critical
 try:
+    yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH) # No .to(DEVICE) needed here for Ultralytics YOLO v8
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
 except Exception as e:
     print(f"Error loading YOLO person model: {e}")
+    # Handle error or exit if critical
+try:
+    yolo_fashion_model = YOLO(YOLO_FASHION_MODEL_PATH) # No .to(DEVICE) needed here
+    print(f"YOLO fashion detection model ({YOLO_FASHION_MODEL_PATH}) loaded successfully.")
+except Exception as e:
+    print(f"Error loading YOLO fashion model: {e}")
+    # Handle error or exit if critical - The app might still work with CLIP only
 # --- Prompts and Responses ---
 style_prompts = {
     ]
 }
+# Clothing prompts for CLIP (still useful as fallback and general context)
 clothing_prompts = [
     "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
     "dress", "skirt", "pants", "jeans", "trousers", "shorts",
     "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
 ]
+# Combine all prompts for CLIP
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
+style_prompts_end_index = len(all_prompts) # Mark where style prompts end
 all_prompts.extend(clothing_prompts)
 print(f"Total prompts for CLIP: {len(all_prompts)}")
+# Response Templates (Added a more generic 'trash' option)
 response_templates = {
     'drippy': [
         "You're Drippy, bruh – fire {item}!", "{item} goes crazy, on god!", "Certified drippy with that {item}."
     'not_drippy': [
         "Bro thought that {item} was tuff!", "Oh hell nah! Burn that {item}!",
         "Crimes against fashion, especially that {item}! Also… maybe get a haircut.",
+        "Never walk out the house again with that {item}.",
+        "Your drip is trash, try again.", # Generic trash response
+        "This ain't it chief. The overall style needs work." # Another generic one
     ]
 }
 CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
+# --- Helper Functions ---
+def get_top_clip_clothing(probs, n=1):
     """Gets the top N clothing items based on CLIP probabilities."""
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
+        return [] # Return empty list if no clothing prompts
+    # Get indices and probabilities of top N items within the clothing slice
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
+    # Convert back to indices in the original all_probs array
+    top_global_indices = [idx + clothing_probs_start_index for idx in top_indices_in_slice]
+    # Return list of tuples: (item_name, probability)
+    top_items_with_probs = [
+        (clothing_prompts[i], clothing_probs[i])
+        for i in reversed(top_indices_in_slice) # Get highest prob first
+    ]
+    return top_items_with_probs
 # --- Core Logic ---
 def analyze_outfit(input_img: Image.Image):
         return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
                 None, "Error: No image provided.")
+    img = input_img.convert("RGB").copy() # Ensure image is in RGB
     # 1) YOLO Person Detection
+    person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
+    # Filter for persons (class 0 in standard YOLOv8)
     person_indices = np.where(classes == 0)[0]
+    cropped_img = img # Default to full image if no person found
+    person_detected = False
     if len(person_indices) > 0:
+        # Find the person detection with the highest confidence
         max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
         x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
+        # Ensure coordinates are valid and within image bounds
         x1, y1 = max(0, x1), max(0, y1)
         x2, y2 = min(img.width, x2), min(img.height, y2)
+        if x1 < x2 and y1 < y2: # Check if the box has valid dimensions
+            cropped_img = img.crop((x1, y1, x2, y2))
+            print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
+            person_detected = True
         else:
             print("Warning: Invalid person bounding box after clipping. Using full image.")
             cropped_img = img
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
+    # 2) YOLO Fashion Model Detection (run on the cropped image if person was found)
+    detected_fashion_item_name = None
+    detected_fashion_item_conf = 0.0
+    if person_detected or True: # Or always run on the (potentially full) image? Let's always run for now.
+        try:
+            fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
+            fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
+            fashion_classes = fashion_results[0].boxes.cls.cpu().numpy().astype(int)
+            fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
+            if len(fashion_classes) > 0:
+                # Find the detection with the highest confidence
+                best_fashion_idx = np.argmax(fashion_confidences)
+                detected_class_id = fashion_classes[best_fashion_idx]
+                detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
+                if detected_class_id in FASHION_CLASSES:
+                    detected_fashion_item_name = FASHION_CLASSES[detected_class_id]
+                    print(f"Fashion model detected: '{detected_fashion_item_name}' "
+                          f"with confidence {detected_fashion_item_conf:.2f}")
+                else:
+                    print(f"Warning: Detected fashion class ID {detected_class_id} not in FASHION_CLASSES map.")
+            else:
+                print("No fashion items detected above threshold by yolo_fashion_model.")
+        except Exception as e:
+            print(f"Error during YOLO fashion model analysis: {e}")
+            # Continue without fashion model input
+    # 3) CLIP Analysis (always run on the cropped/full image)
+    clip_detected_item = "look" # Default fallback item name
+    clip_detected_item_prob = 0.0
+    category_key = 'mid' # Default category
+    final_score_str = "N/A"
     try:
         image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
         text_tokens = clip.tokenize(all_prompts).to(DEVICE)
             logits, _ = clip_model(image_tensor, text_tokens)
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
+        # Calculate style scores
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
         drip_score = np.mean(all_probs[0 : drip_len])
         mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
         not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
+        # Determine overall style category
         if drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
         final_score_str = f"{final_score:.2f}"
         print(f"Style analysis: Category={category_label}, Score={final_score_str}")
+        # Get top clothing item from CLIP
+        top_clip_items = get_top_clip_clothing(all_probs, n=1)
+        if top_clip_items:
+            clip_detected_item, clip_detected_item_prob = top_clip_items[0]
+            print(f"Top clothing item identified by CLIP: '{clip_detected_item}' "
+                  f"with probability {clip_detected_item_prob:.2f}")
         else:
              print("Warning: CLIP did not identify a top clothing item.")
+             clip_detected_item = "fit" # Use a different fallback if CLIP fails
     except Exception as e:
+        print(f"Error during CLIP analysis: {e}")
+        # Use defaults, maybe return error message?
+        return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
                 None, f"Analysis Error: {e}")
+    # 4) Determine the Final Item to Mention in Response
+    final_clothing_item = "style" # Ultimate fallback generic term
+    generic_response_needed = False
+    if detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_HIGH_CONF_THRESHOLD:
+        # Priority 1: High-confidence fashion model detection
+        final_clothing_item = detected_fashion_item_name
+        print(f"Using highly confident fashion model item: '{final_clothing_item}'")
+    elif detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_CONF_THRESHOLD:
+         # Priority 2: Medium-confidence fashion model detection (still prefer over CLIP)
+         final_clothing_item = detected_fashion_item_name
+         print(f"Using medium confidence fashion model item: '{final_clothing_item}'")
+    elif clip_detected_item and clip_detected_item_prob > 0.05: # Check if CLIP prob is somewhat reasonable
+        # Priority 3: CLIP detection (if fashion model didn't provide a strong candidate)
+        final_clothing_item = clip_detected_item
+        print(f"Using CLIP detected item: '{final_clothing_item}'")
+    else:
+        # Priority 4: Generic response needed (no confident detection from either model)
+        final_clothing_item = random.choice(["fit", "look", "style", "vibe"]) # Randomize generic term
+        generic_response_needed = True
+        print(f"Using generic fallback item: '{final_clothing_item}'")
+    # 5) Generate Response and TTS
     try:
+        response_pool = response_templates[category_key]
+        # If generic response is needed OR category is trash, potentially use more generic templates
+        if generic_response_needed or category_key == 'not_drippy':
+             # Give higher chance to generic trash responses if category is 'not_drippy'
+             if category_key == 'not_drippy':
+                 # Mix specific item templates with generic ones
+                 specific_templates = [t for t in response_pool if '{item}' in t]
+                 generic_templates = [t for t in response_pool if '{item}' not in t]
+                 # e.g., 70% chance generic, 30% chance specific item mention (even if generic item name)
+                 if random.random() < 0.7 or generic_response_needed:
+                     chosen_template = random.choice(generic_templates if generic_templates else response_pool)
+                 else:
+                     chosen_template = random.choice(specific_templates if specific_templates else response_pool)
+             else: # Mid or Drippy, but generic needed
+                 chosen_template = random.choice([t for t in response_pool if '{item}' in t] if not generic_response_needed else response_pool)
+        else: # Drippy or Mid, and we have a specific item
+            chosen_template = random.choice([t for t in response_pool if '{item}' in t])
+        # Format the response, substituting the determined item name
+        # Handle cases where the chosen template might be generic and doesn't have {item}
+        if '{item}' in chosen_template:
+            response_text = chosen_template.format(item=final_clothing_item)
+        else:
+            response_text = chosen_template # Use the generic template as is
         tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
         tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
         tts.save(tts_path)
         print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
         # --- Updated HTML Output ---
         category_html = f"""
             <div class='results-container'>
                 <h2 class='result-category'>RATING: {category_label.upper()}</h2>
                 <p class='result-score' style='color: #FFAAAA;'>Error generating audio/full response.</p>
             </div>
         """
+        # Still provide category info, but indicate TTS/response error
         return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
+# --- Elite Fashion / Techno CSS (Keep your existing CSS) ---
+custom_css = """:root { --primary-bg-color: #000000; --secondary-bg-color: #1A1A1A; --text-color: #FFFFFF; --accent-color: #1F04FF; --border-color: #333333; --input-bg-color: #1A1A1A; --button-text-color: #FFFFFF; --body-text-size: 16px; } body, .gradio-container { background-color: var(--primary-bg-color) !important; color: var(--text-color) !important; font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; font-size: var(--body-text-size); } footer { display: none !important; } .gr-block { background-color: var(--secondary-bg-color) !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; padding: 15px !important; box-shadow: none !important; } .gr-input, .gr-output, .gr-textbox textarea, .gr-dropdown select, .gr-checkboxgroup input { background-color: var(--input-bg-color) !important; color: var(--text-color) !important; border: 1px solid var(--border-color) !important; border-radius: 5px !important; } .gr-textbox textarea::placeholder { color: #888888 !important; } .gr-label span, .gr-label .label-text { color: var(--text-color) !important; font-weight: 500 !important; font-size: 0.95em !important; margin-bottom: 8px !important; } .gr-image { background-color: var(--primary-bg-color) !important; border: 1px dashed var(--border-color) !important; border-radius: 8px !important; overflow: hidden; } .gr-image img { border-radius: 6px !important; object-fit: contain; } .gr-image .no-image, .gr-image .upload-button { color: #AAAAAA !important; } .gr-audio > div:first-of-type { border: 1px solid var(--border-color) !important; background-color: var(--secondary-bg-color) !important; border-radius: 5px !important; padding: 10px !important; } .gr-audio audio { width: 100%; filter: invert(1) hue-rotate(180deg); } .gr-button { border: none !important; border-radius: 5px !important; transition: background-color 0.2s ease, transform 0.1s ease; font-weight: 600 !important; } .gr-button-primary { background-color: var(--accent-color) !important; color: var(--button-text-color) !important; font-size: 1.1em !important; padding: 12px 20px !important; } .gr-button-primary:hover { background-color: #482FFF !important; transform: scale(1.02); box-shadow: 0 0 10px var(--accent-color); } .gr-button-primary:active { transform: scale(0.98); } h1, h2, h3 { color: var(--text-color) !important; font-weight: 600; letter-spacing: 0.5px; } .prose h1 { text-align: center; margin-bottom: 25px !important; font-size: 2em !important; text-transform: uppercase; letter-spacing: 1.5px; } .prose p { color: #CCCCCC !important; font-size: 0.95em; text-align: center; } .results-container { text-align: center; padding: 20px; border: 1px solid var(--accent-color); border-radius: 8px; background: linear-gradient(145deg, var(--secondary-bg-color), #2a2a2a); } .result-category { color: var(--accent-color) !important; font-size: 1.5em; margin-bottom: 5px; font-weight: 700; text-transform: uppercase; } .result-score { color: var(--text-color) !important; font-size: 1.1em; margin-top: 0; } .gradio-container { max-width: 850px !important; margin: auto !important; padding-top: 30px; } .gr-row { gap: 25px !important; }"""
+# --- Gradio Interface (Using the custom CSS) ---
+with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="neutral", secondary_hue="neutral", text_size=gr.themes.sizes.text_lg)) as demo:
     gr.Markdown("<h1>💧 DripAI: Rate Your Fit 💧</h1>")
     with gr.Row():
+        with gr.Column(scale=1, min_width=350):
             input_image = gr.Image(
                 type='pil',
+                label="Upload Your Outfit",
                 sources=['upload', 'webcam', 'clipboard'],
+                height=450
             )
             analyze_button = gr.Button(
                 "Analyze Outfit",
                 variant="primary",
             )
+        with gr.Column(scale=1, min_width=350):
+            gr.Markdown("### ANALYSIS RESULTS")
+            category_html = gr.HTML(label="Rating & Score")
             response_box = gr.Textbox(
                 lines=3,
+                label="Verbal Feedback",
                 interactive=False
             )
             audio_output = gr.Audio(
+                autoplay=True, # Keep autoplay off by default
                 label="Audio Feedback",
+                streaming=False # Keep streaming off for pre-recorded TTS
             )
     analyze_button.click(
         fn=analyze_outfit,
         inputs=[input_image],
         outputs=[category_html, audio_output, response_box]
     )
+    gr.Markdown("<p>Upload, paste, or use your webcam to capture your outfit. DripAI evaluates your style using multiple AI models.</p>")
 # --- Launch App ---
 if __name__ == "__main__":
+    # Make sure 'best.pt' is in the same directory or provide the full path
+    if not os.path.exists(YOLO_FASHION_MODEL_PATH):
+        print(f"\n{'='*20} WARNING {'='*20}")
+        print(f"Fashion model file '{YOLO_FASHION_MODEL_PATH}' not found!")
+        print(f"The app will run but fashion item detection will be skipped.")
+        print(f"{'='*50}\n")
+        # Optionally, you could disable the fashion model part entirely here
+        # or raise an error if it's critical.
+    demo.launch(debug=True) # Set debug=False for deployment