dzmu commited on
Commit
fd05be2
·
verified ·
1 Parent(s): bf3b76a

Rename src/drip_backend.py to src/backend.py

Browse files
Files changed (2) hide show
  1. src/backend.py +206 -0
  2. src/drip_backend.py +0 -0
src/backend.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def analyze_outfit(input_img):
2
+ # Handle both file paths and PIL Images
3
+ if isinstance(input_img, str):
4
+ try:
5
+ input_img = Image.open(input_img)
6
+ except Exception as e:
7
+ return (f"<p style='color: #FF5555;'>Error loading image: {str(e)}</p>",
8
+ None, "Image loading error")
9
+
10
+ # Existing code continues...
11
+ if input_img is None:
12
+ return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
13
+ None, "Error: No image provided.")
14
+
15
+ img = input_img.convert("RGB").copy()
16
+ #def analyze_outfit(image):
17
+ #if image is None:
18
+ #return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>", None, "Error: No image provided.")
19
+ #image = image.convert("RGB").copy()
20
+ #print(f"[DEBUG] image_path type: {type(image_path)} | value: {image_path}")
21
+
22
+ # 1) YOLO Person Detection
23
+ person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
24
+ boxes = person_results[0].boxes.xyxy.cpu().numpy()
25
+ classes = person_results[0].boxes.cls.cpu().numpy()
26
+ confidences = person_results[0].boxes.conf.cpu().numpy()
27
+
28
+ # Filter for persons (class 0 in standard YOLOv8)
29
+ person_indices = np.where(classes == 0)[0]
30
+ cropped_img = img # Default to full image if no person found
31
+ person_detected = False
32
+
33
+ if len(person_indices) > 0:
34
+ # Find the person detection with the highest confidence
35
+ max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
36
+ x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
37
+ # Ensure coordinates are valid and within image bounds
38
+ x1, y1 = max(0, x1), max(0, y1)
39
+ x2, y2 = min(img.width, x2), min(img.height, y2)
40
+
41
+ if x1 < x2 and y1 < y2: # Check if the box has valid dimensions
42
+ cropped_img = img.crop((x1, y1, x2, y2))
43
+ print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
44
+ person_detected = True
45
+ else:
46
+ print("Warning: Invalid person bounding box after clipping. Using full image.")
47
+ cropped_img = img
48
+ else:
49
+ print("No person detected by yolo_person_model. Analyzing full image.")
50
+
51
+ # 2) YOLO Fashion Model Detection (run on the cropped image if person was found)
52
+ detected_fashion_item_name = None
53
+ detected_fashion_item_conf = 0.0
54
+ if person_detected or True: # Or always run on the (potentially full) image? Let's always run for now.
55
+ try:
56
+ fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
57
+ fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
58
+ fashion_classes = fashion_results[0].boxes.cls.cpu().numpy().astype(int)
59
+ fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
60
+
61
+ if len(fashion_classes) > 0:
62
+ # Find the detection with the highest confidence
63
+ best_fashion_idx = np.argmax(fashion_confidences)
64
+ detected_class_id = fashion_classes[best_fashion_idx]
65
+ detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
66
+
67
+ if detected_class_id in FASHION_CLASSES:
68
+ detected_fashion_item_name = FASHION_CLASSES[detected_class_id]
69
+ print(f"Fashion model detected: '{detected_fashion_item_name}' "
70
+ f"with confidence {detected_fashion_item_conf:.2f}")
71
+ else:
72
+ print(f"Warning: Detected fashion class ID {detected_class_id} not in FASHION_CLASSES map.")
73
+ else:
74
+ print("No fashion items detected above threshold by yolo_fashion_model.")
75
+
76
+ except Exception as e:
77
+ print(f"Error during YOLO fashion model analysis: {e}")
78
+ # Continue without fashion model input
79
+
80
+ # 3) CLIP Analysis (always run on the cropped/full image)
81
+ clip_detected_item = "look" # Default fallback item name
82
+ clip_detected_item_prob = 0.0
83
+ category_key = 'mid' # Default category
84
+ final_score_str = "N/A"
85
+
86
+ try:
87
+ image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
88
+ text_tokens = clip.tokenize(all_prompts).to(DEVICE)
89
+
90
+ with torch.no_grad():
91
+ logits, _ = clip_model(image_tensor, text_tokens)
92
+ all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
93
+
94
+ # Calculate style scores
95
+ drip_len = len(style_prompts['drippy'])
96
+ mid_len = len(style_prompts['mid'])
97
+ drip_score = np.mean(all_probs[0 : drip_len])
98
+ mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
99
+ not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
100
+
101
+ # Determine overall style category AND DEFINE score_label
102
+ score_label = "Style Score" # Initialize with a default/fallback
103
+ if drip_score > 0.41 and drip_score > mid_score and drip_score > not_score:
104
+ category_key = 'drippy'
105
+ final_score = drip_score
106
+ score_label = "Drip Score" # <<< DEFINE score_label
107
+ elif mid_score > not_score: # Check mid_score > not_score explicitly
108
+ category_key = 'mid'
109
+ final_score = mid_score
110
+ score_label = "Mid Score" # <<< DEFINE score_label
111
+ else:
112
+ category_key = 'not_drippy'
113
+ final_score = not_score
114
+ score_label = "Trash Score" # <<< DEFINE score_label # Or maybe "Rating Score"
115
+
116
+ category_label = CATEGORY_LABEL_MAP[category_key]
117
+ # final_score_str = f"{final_score:.2f}" # You might not need this raw score string anymore
118
+ percentage_score = max(0, final_score * 100)
119
+ percentage_score_str = f"{percentage_score:.0f}%" # Formats as integer (e.g., "3%", "15%", "0%")
120
+
121
+ # Now score_label is defined before being used here
122
+ print(f"Style analysis: Category={category_label}, Score = {score_label}={percentage_score_str} (Raw Score: {final_score:.4f})")
123
+
124
+ # Get top clothing item from CLIP
125
+ top_3_clip_items = get_top_clip_clothing(all_probs, n=3) # <<< Ask for top 3 items
126
+
127
+ if top_3_clip_items:
128
+ # Print the top 3 detected items
129
+ detected_items_str = ", ".join([f"{item[0]} ({item[1]*100:.1f}%)" for item in top_3_clip_items]) # Show item and probability
130
+ print(f"I think I detected: {detected_items_str}")
131
+
132
+ # Still use the single *most* probable item for response generation logic later
133
+ clip_detected_item, clip_detected_item_prob = top_3_clip_items[0]
134
+ # Optional: You can keep or remove the print for the single top item below if the top-3 print is sufficient
135
+ # print(f"Top clothing item identified by CLIP (for response): '{clip_detected_item}' "
136
+ # f"with probability {clip_detected_item_prob:.2f}")
137
+ else:
138
+ print("I couldn't confidently identify specific clothing items via CLIP.")
139
+ clip_detected_item = "piece" # Use a different fallback if CLIP fails
140
+ clip_detected_item_prob = 0.0 # Ensure prob is defined
141
+
142
+ except Exception as e:
143
+ print(f"Error during CLIP analysis: {e}")
144
+ # Use defaults, maybe return error message?
145
+ return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
146
+ None, f"Analysis Error: {e}")
147
+
148
+ # 4) Determine the Final Item to Mention in Response
149
+ final_clothing_item = "style" # Ultimate fallback generic term
150
+ generic_response_needed = False
151
+
152
+ if detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_HIGH_CONF_THRESHOLD:
153
+ # Priority 1: High-confidence fashion model detection
154
+ final_clothing_item = detected_fashion_item_name
155
+ print(f"Using highly confident fashion model item: '{final_clothing_item}'")
156
+ elif detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_CONF_THRESHOLD:
157
+ # Priority 2: Medium-confidence fashion model detection (still prefer over CLIP)
158
+ final_clothing_item = detected_fashion_item_name
159
+ print(f"Using medium confidence fashion model item: '{final_clothing_item}'")
160
+ elif clip_detected_item and clip_detected_item_prob > 0.05: # Check if CLIP prob is somewhat reasonable
161
+ # Priority 3: CLIP detection (if fashion model didn't provide a strong candidate)
162
+ final_clothing_item = clip_detected_item
163
+ print(f"Using CLIP detected item: '{final_clothing_item}'")
164
+ else:
165
+ # Priority 4: Generic response needed (no confident detection from either model)
166
+ final_clothing_item = random.choice(["fit", "look", "style", "vibe"]) # Randomize generic term
167
+ generic_response_needed = True
168
+ print(f"Using generic fallback item: '{final_clothing_item}'")
169
+
170
+
171
+ # 5) Generate Response and TTS
172
+ try:
173
+ response_pool = response_templates[category_key]
174
+ # Choose a random template from the entire response pool
175
+ chosen_template = random.choice(response_pool)
176
+
177
+ # Format the response, substituting the item name if needed
178
+ response_text = chosen_template.format(item=final_clothing_item) if '{item}' in chosen_template else chosen_template
179
+
180
+ tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
181
+ tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
182
+ tts.save(tts_path)
183
+ print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
184
+
185
+ # --- Updated HTML Output ---
186
+ category_html = f"""
187
+ <div class='results-container'>
188
+ <h2 class='result-category'>RATING: {category_label.upper()}</h2>
189
+ <p class='result-score'>{score_label}: {percentage_score_str}</p>
190
+ </div>
191
+ """
192
+ return category_html, tts_path, response_text
193
+
194
+ except Exception as e:
195
+ print(f"Error during response/TTS generation: {e}")
196
+ percentage_score = max(0, final_score * 100)
197
+ percentage_score_str = f"{percentage_score:.0f}%"
198
+ category_html = f"""
199
+ <div class='results-container'>
200
+ <h2 class='result-category'>Result: {category_label.upper()}</h2>
201
+ <p class='result-score'>{score_label}: {percentage_score_str}</p>
202
+ <p class='result-error' style='color: #FFAAAA; font-size: 0.9em;'>Error generating audio/full response.</p>
203
+ </div>
204
+ """
205
+ # Still provide category info, but indicate TTS/response error
206
+ return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
src/drip_backend.py DELETED
File without changes