Spaces:

dzmu
/

DripAI2Test

Running

App Files Files Community

dzmu commited on Apr 23, 2025

Commit

fd05be2

verified ·

1 Parent(s): bf3b76a

Rename src/drip_backend.py to src/backend.py

Browse files

Files changed (2) hide show

src/backend.py +206 -0
src/drip_backend.py +0 -0

src/backend.py ADDED Viewed

	@@ -0,0 +1,206 @@

+def analyze_outfit(input_img):
+    # Handle both file paths and PIL Images
+    if isinstance(input_img, str):
+        try:
+            input_img = Image.open(input_img)
+        except Exception as e:
+            return (f"<p style='color: #FF5555;'>Error loading image: {str(e)}</p>",
+                    None, "Image loading error")
+    # Existing code continues...
+    if input_img is None:
+        return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
+                None, "Error: No image provided.")
+    img = input_img.convert("RGB").copy()
+#def analyze_outfit(image):
+    #if image is None:
+        #return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>", None, "Error: No image provided.")
+    #image = image.convert("RGB").copy()
+    #print(f"[DEBUG] image_path type: {type(image_path)} | value: {image_path}")
+    # 1) YOLO Person Detection
+    person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
+    boxes = person_results[0].boxes.xyxy.cpu().numpy()
+    classes = person_results[0].boxes.cls.cpu().numpy()
+    confidences = person_results[0].boxes.conf.cpu().numpy()
+    # Filter for persons (class 0 in standard YOLOv8)
+    person_indices = np.where(classes == 0)[0]
+    cropped_img = img # Default to full image if no person found
+    person_detected = False
+    if len(person_indices) > 0:
+        # Find the person detection with the highest confidence
+        max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
+        x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
+        # Ensure coordinates are valid and within image bounds
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(img.width, x2), min(img.height, y2)
+        if x1 < x2 and y1 < y2: # Check if the box has valid dimensions
+            cropped_img = img.crop((x1, y1, x2, y2))
+            print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
+            person_detected = True
+        else:
+            print("Warning: Invalid person bounding box after clipping. Using full image.")
+            cropped_img = img
+    else:
+        print("No person detected by yolo_person_model. Analyzing full image.")
+    # 2) YOLO Fashion Model Detection (run on the cropped image if person was found)
+    detected_fashion_item_name = None
+    detected_fashion_item_conf = 0.0
+    if person_detected or True: # Or always run on the (potentially full) image? Let's always run for now.
+        try:
+            fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
+            fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
+            fashion_classes = fashion_results[0].boxes.cls.cpu().numpy().astype(int)
+            fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
+            if len(fashion_classes) > 0:
+                # Find the detection with the highest confidence
+                best_fashion_idx = np.argmax(fashion_confidences)
+                detected_class_id = fashion_classes[best_fashion_idx]
+                detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
+                if detected_class_id in FASHION_CLASSES:
+                    detected_fashion_item_name = FASHION_CLASSES[detected_class_id]
+                    print(f"Fashion model detected: '{detected_fashion_item_name}' "
+                          f"with confidence {detected_fashion_item_conf:.2f}")
+                else:
+                    print(f"Warning: Detected fashion class ID {detected_class_id} not in FASHION_CLASSES map.")
+            else:
+                print("No fashion items detected above threshold by yolo_fashion_model.")
+        except Exception as e:
+            print(f"Error during YOLO fashion model analysis: {e}")
+            # Continue without fashion model input
+    # 3) CLIP Analysis (always run on the cropped/full image)
+    clip_detected_item = "look" # Default fallback item name
+    clip_detected_item_prob = 0.0
+    category_key = 'mid' # Default category
+    final_score_str = "N/A"
+    try:
+        image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
+        text_tokens = clip.tokenize(all_prompts).to(DEVICE)
+        with torch.no_grad():
+            logits, _ = clip_model(image_tensor, text_tokens)
+            all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
+        # Calculate style scores
+        drip_len = len(style_prompts['drippy'])
+        mid_len = len(style_prompts['mid'])
+        drip_score = np.mean(all_probs[0 : drip_len])
+        mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
+        not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
+        # Determine overall style category AND DEFINE score_label
+        score_label = "Style Score" # Initialize with a default/fallback
+        if drip_score > 0.41 and drip_score > mid_score and drip_score > not_score:
+            category_key = 'drippy'
+            final_score = drip_score
+            score_label = "Drip Score" # <<< DEFINE score_label
+        elif mid_score > not_score: # Check mid_score > not_score explicitly
+            category_key = 'mid'
+            final_score = mid_score
+            score_label = "Mid Score" # <<< DEFINE score_label
+        else:
+            category_key = 'not_drippy'
+            final_score = not_score
+            score_label = "Trash Score" # <<< DEFINE score_label # Or maybe "Rating Score"
+        category_label = CATEGORY_LABEL_MAP[category_key]
+        # final_score_str = f"{final_score:.2f}" # You might not need this raw score string anymore
+        percentage_score = max(0, final_score * 100)
+        percentage_score_str = f"{percentage_score:.0f}%" # Formats as integer (e.g., "3%", "15%", "0%")
+        # Now score_label is defined before being used here
+        print(f"Style analysis: Category={category_label}, Score = {score_label}={percentage_score_str} (Raw Score: {final_score:.4f})")
+        # Get top clothing item from CLIP
+        top_3_clip_items = get_top_clip_clothing(all_probs, n=3) # <<< Ask for top 3 items
+        if top_3_clip_items:
+            # Print the top 3 detected items
+            detected_items_str = ", ".join([f"{item[0]} ({item[1]*100:.1f}%)" for item in top_3_clip_items]) # Show item and probability
+            print(f"I think I detected: {detected_items_str}")
+            # Still use the single *most* probable item for response generation logic later
+            clip_detected_item, clip_detected_item_prob = top_3_clip_items[0]
+            # Optional: You can keep or remove the print for the single top item below if the top-3 print is sufficient
+            # print(f"Top clothing item identified by CLIP (for response): '{clip_detected_item}' "
+            #       f"with probability {clip_detected_item_prob:.2f}")
+        else:
+             print("I couldn't confidently identify specific clothing items via CLIP.")
+             clip_detected_item = "piece" # Use a different fallback if CLIP fails
+             clip_detected_item_prob = 0.0 # Ensure prob is defined
+    except Exception as e:
+        print(f"Error during CLIP analysis: {e}")
+        # Use defaults, maybe return error message?
+        return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
+                None, f"Analysis Error: {e}")
+    # 4) Determine the Final Item to Mention in Response
+    final_clothing_item = "style" # Ultimate fallback generic term
+    generic_response_needed = False
+    if detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_HIGH_CONF_THRESHOLD:
+        # Priority 1: High-confidence fashion model detection
+        final_clothing_item = detected_fashion_item_name
+        print(f"Using highly confident fashion model item: '{final_clothing_item}'")
+    elif detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_CONF_THRESHOLD:
+         # Priority 2: Medium-confidence fashion model detection (still prefer over CLIP)
+         final_clothing_item = detected_fashion_item_name
+         print(f"Using medium confidence fashion model item: '{final_clothing_item}'")
+    elif clip_detected_item and clip_detected_item_prob > 0.05: # Check if CLIP prob is somewhat reasonable
+        # Priority 3: CLIP detection (if fashion model didn't provide a strong candidate)
+        final_clothing_item = clip_detected_item
+        print(f"Using CLIP detected item: '{final_clothing_item}'")
+    else:
+        # Priority 4: Generic response needed (no confident detection from either model)
+        final_clothing_item = random.choice(["fit", "look", "style", "vibe"]) # Randomize generic term
+        generic_response_needed = True
+        print(f"Using generic fallback item: '{final_clothing_item}'")
+    # 5) Generate Response and TTS
+    try:
+        response_pool = response_templates[category_key]
+        # Choose a random template from the entire response pool
+        chosen_template = random.choice(response_pool)
+        # Format the response, substituting the item name if needed
+        response_text = chosen_template.format(item=final_clothing_item) if '{item}' in chosen_template else chosen_template
+        tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
+        tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
+        tts.save(tts_path)
+        print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
+        # --- Updated HTML Output ---
+        category_html = f"""
+            <div class='results-container'>
+                <h2 class='result-category'>RATING: {category_label.upper()}</h2>
+                <p class='result-score'>{score_label}: {percentage_score_str}</p>
+            </div>
+        """
+        return category_html, tts_path, response_text
+    except Exception as e:
+        print(f"Error during response/TTS generation: {e}")
+        percentage_score = max(0, final_score * 100)
+        percentage_score_str = f"{percentage_score:.0f}%"
+        category_html = f"""
+            <div class='results-container'>
+                <h2 class='result-category'>Result: {category_label.upper()}</h2>
+                <p class='result-score'>{score_label}: {percentage_score_str}</p>
+                <p class='result-error' style='color: #FFAAAA; font-size: 0.9em;'>Error generating audio/full response.</p>
+            </div>
+        """
+        # Still provide category info, but indicate TTS/response error
+        return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."

src/drip_backend.py DELETED Viewed

File without changes