dzmu commited on
Commit
d0d6795
·
verified ·
1 Parent(s): 16ed44a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -207
app.py CHANGED
@@ -5,7 +5,7 @@ import numpy as np
5
  import random
6
  import os
7
  from PIL import Image
8
- from ultralytics import YOLO # Still needed for person detection
9
  from gtts import gTTS
10
  import uuid
11
  import time
@@ -14,9 +14,23 @@ import tempfile
14
  # --- Configuration ---
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
  YOLO_PERSON_MODEL_PATH = 'yolov8n.pt' # Standard YOLOv8 for person detection
17
- # YOLO_FASHION_MODEL_PATH = 'best.pt' # REMOVED - Not using fashion model anymore
18
  CLIP_MODEL_NAME = "ViT-B/32"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # --- Load Models ---
21
  print(f"Using device: {DEVICE}")
22
  try:
@@ -24,15 +38,21 @@ try:
24
  print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
25
  except Exception as e:
26
  print(f"Error loading CLIP model: {e}")
27
- # Handle error
 
28
  try:
29
- yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH).to(DEVICE)
30
  print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
31
  except Exception as e:
32
  print(f"Error loading YOLO person model: {e}")
33
- # Handle error
34
 
35
- # REMOVED Fashion Model Loading
 
 
 
 
 
36
 
37
  # --- Prompts and Responses ---
38
  style_prompts = {
@@ -50,7 +70,7 @@ style_prompts = {
50
  ]
51
  }
52
 
53
- # --- REINSTATED: Clothing prompts for CLIP ---
54
  clothing_prompts = [
55
  "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
56
  "dress", "skirt", "pants", "jeans", "trousers", "shorts",
@@ -58,16 +78,15 @@ clothing_prompts = [
58
  "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
59
  ]
60
 
61
- # --- REINSTATED: Combine all prompts for CLIP ---
62
  all_prompts = []
63
  for cat_prompts in style_prompts.values():
64
  all_prompts.extend(cat_prompts)
65
-
66
- # Record end of style prompts before adding clothing prompts
67
- style_prompts_end_index = len(all_prompts)
68
  all_prompts.extend(clothing_prompts)
69
  print(f"Total prompts for CLIP: {len(all_prompts)}")
70
 
 
71
  response_templates = {
72
  'drippy': [
73
  "You're Drippy, bruh – fire {item}!", "{item} goes crazy, on god!", "Certified drippy with that {item}."
@@ -79,21 +98,33 @@ response_templates = {
79
  'not_drippy': [
80
  "Bro thought that {item} was tuff!", "Oh hell nah! Burn that {item}!",
81
  "Crimes against fashion, especially that {item}! Also… maybe get a haircut.",
82
- "Never walk out the house again with that {item}."
 
 
83
  ]
84
  }
85
  CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
86
 
87
- # --- REINSTATED: Function to get top clothing items based on CLIP probabilities ---
88
- def get_top_clothing(probs, n=3):
89
  """Gets the top N clothing items based on CLIP probabilities."""
90
  clothing_probs_start_index = style_prompts_end_index
91
  clothing_probs = probs[clothing_probs_start_index:]
92
  actual_n = min(n, len(clothing_prompts))
93
  if actual_n <= 0:
94
- return ["item"]
 
 
95
  top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
96
- return [clothing_prompts[i] for i in reversed(top_indices_in_slice)]
 
 
 
 
 
 
 
 
97
 
98
  # --- Core Logic ---
99
  def analyze_outfit(input_img: Image.Image):
@@ -101,30 +132,72 @@ def analyze_outfit(input_img: Image.Image):
101
  return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
102
  None, "Error: No image provided.")
103
 
104
- img = input_img.copy()
 
105
  # 1) YOLO Person Detection
106
- person_results = yolo_person_model(img, verbose=False)
107
  boxes = person_results[0].boxes.xyxy.cpu().numpy()
108
  classes = person_results[0].boxes.cls.cpu().numpy()
109
  confidences = person_results[0].boxes.conf.cpu().numpy()
 
 
110
  person_indices = np.where(classes == 0)[0]
111
- cropped_img = img
 
 
112
  if len(person_indices) > 0:
 
113
  max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
114
  x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
 
115
  x1, y1 = max(0, x1), max(0, y1)
116
  x2, y2 = min(img.width, x2), min(img.height, y2)
117
- if x1 < x2 and y1 < y2:
118
- cropped_img = img.crop((x1, y1, x2, y2))
119
- print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
 
 
120
  else:
121
  print("Warning: Invalid person bounding box after clipping. Using full image.")
122
  cropped_img = img
123
  else:
124
  print("No person detected by yolo_person_model. Analyzing full image.")
125
 
126
- # 2) CLIP Analysis
127
- detected_clothing_item = "look"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  try:
129
  image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
130
  text_tokens = clip.tokenize(all_prompts).to(DEVICE)
@@ -133,12 +206,14 @@ def analyze_outfit(input_img: Image.Image):
133
  logits, _ = clip_model(image_tensor, text_tokens)
134
  all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
135
 
 
136
  drip_len = len(style_prompts['drippy'])
137
  mid_len = len(style_prompts['mid'])
138
  drip_score = np.mean(all_probs[0 : drip_len])
139
  mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
140
  not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
141
 
 
142
  if drip_score > mid_score and drip_score > not_score:
143
  category_key = 'drippy'
144
  final_score = drip_score
@@ -153,29 +228,80 @@ def analyze_outfit(input_img: Image.Image):
153
  final_score_str = f"{final_score:.2f}"
154
  print(f"Style analysis: Category={category_label}, Score={final_score_str}")
155
 
156
- clothing_items_detected_by_clip = get_top_clothing(all_probs, n=1)
157
- if clothing_items_detected_by_clip:
158
- detected_clothing_item = clothing_items_detected_by_clip[0]
159
- print(f"Top clothing item identified by CLIP: {detected_clothing_item}")
 
 
160
  else:
161
  print("Warning: CLIP did not identify a top clothing item.")
162
- detected_clothing_item = "fit"
163
 
164
  except Exception as e:
165
- print(f"Error during CLIP analysis or clothing selection: {e}")
166
- return ("<p style='color: #FF5555;'>Error during analysis.</p>",
 
167
  None, f"Analysis Error: {e}")
168
 
169
- # 3) Generate Response and TTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  try:
171
- response_text = random.choice(response_templates[category_key]).format(item=detected_clothing_item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
173
  tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
174
  tts.save(tts_path)
175
  print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
176
 
177
  # --- Updated HTML Output ---
178
- # Simpler structure, relies more on CSS for styling defined below
179
  category_html = f"""
180
  <div class='results-container'>
181
  <h2 class='result-category'>RATING: {category_label.upper()}</h2>
@@ -192,205 +318,57 @@ def analyze_outfit(input_img: Image.Image):
192
  <p class='result-score' style='color: #FFAAAA;'>Error generating audio/full response.</p>
193
  </div>
194
  """
 
195
  return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
196
 
 
 
197
 
198
- # --- Elite Fashion / Techno CSS ---
199
- custom_css = """
200
- :root {
201
- --primary-bg-color: #000000;
202
- --secondary-bg-color: #1A1A1A;
203
- --text-color: #FFFFFF;
204
- --accent-color: #1F04FF;
205
- --border-color: #333333; /* Slightly lighter than secondary bg for subtle definition */
206
- --input-bg-color: #1A1A1A;
207
- --button-text-color: #FFFFFF;
208
- --body-text-size: 16px; /* Base text size */
209
- }
210
-
211
- /* --- Global Styles --- */
212
- body, .gradio-container {
213
- background-color: var(--primary-bg-color) !important;
214
- color: var(--text-color) !important;
215
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; /* Modern font stack */
216
- font-size: var(--body-text-size);
217
- }
218
-
219
- /* Hide default Gradio footer */
220
- footer { display: none !important; }
221
-
222
- /* --- Component Styling --- */
223
- .gr-block { /* General block container */
224
- background-color: var(--secondary-bg-color) !important;
225
- border: 1px solid var(--border-color) !important;
226
- border-radius: 8px !important; /* Slightly rounded corners */
227
- padding: 15px !important;
228
- box-shadow: none !important; /* Remove default shadows */
229
- }
230
-
231
- /* Input/Output Text Areas & General inputs */
232
- .gr-input, .gr-output, .gr-textbox textarea, .gr-dropdown select, .gr-checkboxgroup input {
233
- background-color: var(--input-bg-color) !important;
234
- color: var(--text-color) !important;
235
- border: 1px solid var(--border-color) !important;
236
- border-radius: 5px !important;
237
- }
238
- .gr-textbox textarea::placeholder { /* Style placeholder text if needed */
239
- color: #888888 !important;
240
- }
241
-
242
- /* Component Labels */
243
- .gr-label span, .gr-label .label-text {
244
- color: var(--text-color) !important;
245
- font-weight: 500 !important; /* Slightly bolder labels */
246
- font-size: 0.95em !important;
247
- margin-bottom: 8px !important; /* Space below label */
248
- }
249
-
250
- /* Image Input/Output */
251
- .gr-image {
252
- background-color: var(--primary-bg-color) !important; /* Match main background */
253
- border: 1px dashed var(--border-color) !important; /* Dashed border for drop zone */
254
- border-radius: 8px !important;
255
- overflow: hidden; /* Ensure image stays within bounds */
256
- }
257
- .gr-image img {
258
- border-radius: 6px !important; /* Slightly round image corners */
259
- object-fit: contain; /* Ensure image fits well */
260
- }
261
- .gr-image .no-image, .gr-image .upload-button { /* Placeholder text/button inside image component */
262
- color: #AAAAAA !important;
263
- }
264
-
265
- /* Audio Component */
266
- .gr-audio > div:first-of-type { /* Target the container around the audio player */
267
- border: 1px solid var(--border-color) !important;
268
- background-color: var(--secondary-bg-color) !important;
269
- border-radius: 5px !important;
270
- padding: 10px !important;
271
- }
272
- .gr-audio audio { /* Style the audio player itself */
273
- width: 100%; /* Make player responsive */
274
- filter: invert(1) hue-rotate(180deg); /* Basic dark theme for player controls */
275
- }
276
-
277
- /* --- Button Styling --- */
278
- .gr-button { /* General button style reset */
279
- border: none !important;
280
- border-radius: 5px !important;
281
- transition: background-color 0.2s ease, transform 0.1s ease;
282
- font-weight: 600 !important;
283
- }
284
- .gr-button-primary { /* Specific styling for the primary Analyze button */
285
- background-color: var(--accent-color) !important;
286
- color: var(--button-text-color) !important;
287
- font-size: 1.1em !important; /* Make primary button slightly larger */
288
- padding: 12px 20px !important; /* Adjust padding */
289
- }
290
- .gr-button-primary:hover {
291
- background-color: #482FFF !important; /* Slightly lighter blue on hover */
292
- transform: scale(1.02); /* Subtle scale effect */
293
- box-shadow: 0 0 10px var(--accent-color); /* Add a glow effect */
294
- }
295
- .gr-button-primary:active {
296
- transform: scale(0.98); /* Press down effect */
297
- }
298
-
299
- /* --- Typography & Content --- */
300
- h1, h2, h3 {
301
- color: var(--text-color) !important;
302
- font-weight: 600; /* Bold headings */
303
- letter-spacing: 0.5px; /* Add slight letter spacing */
304
- }
305
- .prose h1 { /* Target Markdown H1 specifically if needed */
306
- text-align: center;
307
- margin-bottom: 25px !important;
308
- font-size: 2em !important; /* Larger title */
309
- text-transform: uppercase; /* Uppercase for impact */
310
- letter-spacing: 1.5px;
311
- }
312
- .prose p { /* Target Markdown Paragraph */
313
- color: #CCCCCC !important; /* Slightly dimmer text for descriptions */
314
- font-size: 0.95em;
315
- text-align: center;
316
- }
317
-
318
- /* Custom styling for the results HTML block */
319
- .results-container {
320
- text-align: center;
321
- padding: 20px;
322
- border: 1px solid var(--accent-color); /* Use accent color for border */
323
- border-radius: 8px;
324
- background: linear-gradient(145deg, var(--secondary-bg-color), #2a2a2a); /* Subtle gradient */
325
- }
326
- .result-category {
327
- color: var(--accent-color) !important; /* Use accent color for category */
328
- font-size: 1.5em;
329
- margin-bottom: 5px;
330
- font-weight: 700;
331
- text-transform: uppercase;
332
- }
333
- .result-score {
334
- color: var(--text-color) !important;
335
- font-size: 1.1em;
336
- margin-top: 0;
337
- }
338
-
339
- /* --- Layout Adjustments --- */
340
- .gradio-container {
341
- max-width: 850px !important; /* Slightly wider max-width */
342
- margin: auto !important;
343
- padding-top: 30px; /* Add some space at the top */
344
- }
345
- .gr-row {
346
- gap: 25px !important; /* Increase gap between columns */
347
- }
348
- """
349
-
350
-
351
- # --- Gradio Interface (Now using the custom CSS) ---
352
- with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="neutral", secondary_hue="neutral", text_size=gr.themes.sizes.text_lg)) as demo: # Use Base theme to minimize default styles
353
- # Title using Markdown (styled by CSS)
354
  gr.Markdown("<h1>💧 DripAI: Rate Your Fit 💧</h1>")
355
-
356
  with gr.Row():
357
- with gr.Column(scale=1, min_width=350): # Assign min width for better responsiveness
358
  input_image = gr.Image(
359
  type='pil',
360
- label="Upload Your Outfit", # Simpler label
361
  sources=['upload', 'webcam', 'clipboard'],
362
- height=450 # Slightly taller image area
363
  )
364
  analyze_button = gr.Button(
365
  "Analyze Outfit",
366
  variant="primary",
367
- # size="lg" removed, controlled by CSS
368
  )
369
-
370
- with gr.Column(scale=1, min_width=350): # Assign min width
371
- gr.Markdown("### ANALYSIS RESULTS") # Simple heading
372
- category_html = gr.HTML(label="Rating & Score") # Label for screen readers/context
373
  response_box = gr.Textbox(
374
  lines=3,
375
- label="Verbal Feedback", # Updated label
376
  interactive=False
377
  )
378
  audio_output = gr.Audio(
379
- autoplay=True, # Changed default to false, user can click play
380
  label="Audio Feedback",
381
- streaming=False
382
  )
383
 
384
- # Bind the analysis function to the button click
385
  analyze_button.click(
386
  fn=analyze_outfit,
387
  inputs=[input_image],
388
  outputs=[category_html, audio_output, response_box]
389
  )
390
-
391
- # Footer description text
392
- gr.Markdown("<p>Upload, paste, or use your webcam to capture your outfit. DripAI evaluates your style.</p>")
393
 
394
  # --- Launch App ---
395
  if __name__ == "__main__":
396
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
5
  import random
6
  import os
7
  from PIL import Image
8
+ from ultralytics import YOLO # Needed for both person and fashion detection
9
  from gtts import gTTS
10
  import uuid
11
  import time
 
14
  # --- Configuration ---
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
  YOLO_PERSON_MODEL_PATH = 'yolov8n.pt' # Standard YOLOv8 for person detection
17
+ YOLO_FASHION_MODEL_PATH = 'best.pt' # <<< Your custom fashion model path
18
  CLIP_MODEL_NAME = "ViT-B/32"
19
 
20
+ # Confidence Thresholds
21
+ YOLO_PERSON_CONF_THRESHOLD = 0.4 # Min confidence for detecting a person
22
+ YOLO_FASHION_CONF_THRESHOLD = 0.4 # Min confidence for detecting a fashion item
23
+ YOLO_FASHION_HIGH_CONF_THRESHOLD = 0.6 # Higher threshold to prioritize fashion model item
24
+
25
+ # --- Define Fashion Model Classes (IMPORTANT: Match these to your 'best.pt' training) ---
26
+ FASHION_CLASSES = {
27
+ 0: 'long sleeve top', 1: 'skirt', 2: 'trousers', 3: 'short sleeve top',
28
+ 4: 'long sleeve outwear', 5: 'short sleeve dress', 6: 'shorts',
29
+ 7: 'vest dress', 8: 'sling dress', 9: 'vest', 10: 'long sleeve dress',
30
+ 11: 'sling', 12: 'short sleeve outwear'
31
+ }
32
+ print(f"Defined {len(FASHION_CLASSES)} fashion categories for {YOLO_FASHION_MODEL_PATH}")
33
+
34
  # --- Load Models ---
35
  print(f"Using device: {DEVICE}")
36
  try:
 
38
  print(f"CLIP model ({CLIP_MODEL_NAME}) loaded successfully.")
39
  except Exception as e:
40
  print(f"Error loading CLIP model: {e}")
41
+ # Handle error or exit if critical
42
+
43
  try:
44
+ yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH) # No .to(DEVICE) needed here for Ultralytics YOLO v8
45
  print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
46
  except Exception as e:
47
  print(f"Error loading YOLO person model: {e}")
48
+ # Handle error or exit if critical
49
 
50
+ try:
51
+ yolo_fashion_model = YOLO(YOLO_FASHION_MODEL_PATH) # No .to(DEVICE) needed here
52
+ print(f"YOLO fashion detection model ({YOLO_FASHION_MODEL_PATH}) loaded successfully.")
53
+ except Exception as e:
54
+ print(f"Error loading YOLO fashion model: {e}")
55
+ # Handle error or exit if critical - The app might still work with CLIP only
56
 
57
  # --- Prompts and Responses ---
58
  style_prompts = {
 
70
  ]
71
  }
72
 
73
+ # Clothing prompts for CLIP (still useful as fallback and general context)
74
  clothing_prompts = [
75
  "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
76
  "dress", "skirt", "pants", "jeans", "trousers", "shorts",
 
78
  "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
79
  ]
80
 
81
+ # Combine all prompts for CLIP
82
  all_prompts = []
83
  for cat_prompts in style_prompts.values():
84
  all_prompts.extend(cat_prompts)
85
+ style_prompts_end_index = len(all_prompts) # Mark where style prompts end
 
 
86
  all_prompts.extend(clothing_prompts)
87
  print(f"Total prompts for CLIP: {len(all_prompts)}")
88
 
89
+ # Response Templates (Added a more generic 'trash' option)
90
  response_templates = {
91
  'drippy': [
92
  "You're Drippy, bruh – fire {item}!", "{item} goes crazy, on god!", "Certified drippy with that {item}."
 
98
  'not_drippy': [
99
  "Bro thought that {item} was tuff!", "Oh hell nah! Burn that {item}!",
100
  "Crimes against fashion, especially that {item}! Also… maybe get a haircut.",
101
+ "Never walk out the house again with that {item}.",
102
+ "Your drip is trash, try again.", # Generic trash response
103
+ "This ain't it chief. The overall style needs work." # Another generic one
104
  ]
105
  }
106
  CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
107
 
108
+ # --- Helper Functions ---
109
+ def get_top_clip_clothing(probs, n=1):
110
  """Gets the top N clothing items based on CLIP probabilities."""
111
  clothing_probs_start_index = style_prompts_end_index
112
  clothing_probs = probs[clothing_probs_start_index:]
113
  actual_n = min(n, len(clothing_prompts))
114
  if actual_n <= 0:
115
+ return [] # Return empty list if no clothing prompts
116
+
117
+ # Get indices and probabilities of top N items within the clothing slice
118
  top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
119
+ # Convert back to indices in the original all_probs array
120
+ top_global_indices = [idx + clothing_probs_start_index for idx in top_indices_in_slice]
121
+
122
+ # Return list of tuples: (item_name, probability)
123
+ top_items_with_probs = [
124
+ (clothing_prompts[i], clothing_probs[i])
125
+ for i in reversed(top_indices_in_slice) # Get highest prob first
126
+ ]
127
+ return top_items_with_probs
128
 
129
  # --- Core Logic ---
130
  def analyze_outfit(input_img: Image.Image):
 
132
  return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
133
  None, "Error: No image provided.")
134
 
135
+ img = input_img.convert("RGB").copy() # Ensure image is in RGB
136
+
137
  # 1) YOLO Person Detection
138
+ person_results = yolo_person_model(img, verbose=False, conf=YOLO_PERSON_CONF_THRESHOLD)
139
  boxes = person_results[0].boxes.xyxy.cpu().numpy()
140
  classes = person_results[0].boxes.cls.cpu().numpy()
141
  confidences = person_results[0].boxes.conf.cpu().numpy()
142
+
143
+ # Filter for persons (class 0 in standard YOLOv8)
144
  person_indices = np.where(classes == 0)[0]
145
+ cropped_img = img # Default to full image if no person found
146
+ person_detected = False
147
+
148
  if len(person_indices) > 0:
149
+ # Find the person detection with the highest confidence
150
  max_conf_person_idx = person_indices[np.argmax(confidences[person_indices])]
151
  x1, y1, x2, y2 = map(int, boxes[max_conf_person_idx])
152
+ # Ensure coordinates are valid and within image bounds
153
  x1, y1 = max(0, x1), max(0, y1)
154
  x2, y2 = min(img.width, x2), min(img.height, y2)
155
+
156
+ if x1 < x2 and y1 < y2: # Check if the box has valid dimensions
157
+ cropped_img = img.crop((x1, y1, x2, y2))
158
+ print(f"Person detected and cropped: Box {x1, y1, x2, y2}")
159
+ person_detected = True
160
  else:
161
  print("Warning: Invalid person bounding box after clipping. Using full image.")
162
  cropped_img = img
163
  else:
164
  print("No person detected by yolo_person_model. Analyzing full image.")
165
 
166
+ # 2) YOLO Fashion Model Detection (run on the cropped image if person was found)
167
+ detected_fashion_item_name = None
168
+ detected_fashion_item_conf = 0.0
169
+ if person_detected or True: # Or always run on the (potentially full) image? Let's always run for now.
170
+ try:
171
+ fashion_results = yolo_fashion_model(cropped_img, verbose=False, conf=YOLO_FASHION_CONF_THRESHOLD)
172
+ fashion_boxes = fashion_results[0].boxes.xyxy.cpu().numpy()
173
+ fashion_classes = fashion_results[0].boxes.cls.cpu().numpy().astype(int)
174
+ fashion_confidences = fashion_results[0].boxes.conf.cpu().numpy()
175
+
176
+ if len(fashion_classes) > 0:
177
+ # Find the detection with the highest confidence
178
+ best_fashion_idx = np.argmax(fashion_confidences)
179
+ detected_class_id = fashion_classes[best_fashion_idx]
180
+ detected_fashion_item_conf = fashion_confidences[best_fashion_idx]
181
+
182
+ if detected_class_id in FASHION_CLASSES:
183
+ detected_fashion_item_name = FASHION_CLASSES[detected_class_id]
184
+ print(f"Fashion model detected: '{detected_fashion_item_name}' "
185
+ f"with confidence {detected_fashion_item_conf:.2f}")
186
+ else:
187
+ print(f"Warning: Detected fashion class ID {detected_class_id} not in FASHION_CLASSES map.")
188
+ else:
189
+ print("No fashion items detected above threshold by yolo_fashion_model.")
190
+
191
+ except Exception as e:
192
+ print(f"Error during YOLO fashion model analysis: {e}")
193
+ # Continue without fashion model input
194
+
195
+ # 3) CLIP Analysis (always run on the cropped/full image)
196
+ clip_detected_item = "look" # Default fallback item name
197
+ clip_detected_item_prob = 0.0
198
+ category_key = 'mid' # Default category
199
+ final_score_str = "N/A"
200
+
201
  try:
202
  image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
203
  text_tokens = clip.tokenize(all_prompts).to(DEVICE)
 
206
  logits, _ = clip_model(image_tensor, text_tokens)
207
  all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
208
 
209
+ # Calculate style scores
210
  drip_len = len(style_prompts['drippy'])
211
  mid_len = len(style_prompts['mid'])
212
  drip_score = np.mean(all_probs[0 : drip_len])
213
  mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
214
  not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
215
 
216
+ # Determine overall style category
217
  if drip_score > mid_score and drip_score > not_score:
218
  category_key = 'drippy'
219
  final_score = drip_score
 
228
  final_score_str = f"{final_score:.2f}"
229
  print(f"Style analysis: Category={category_label}, Score={final_score_str}")
230
 
231
+ # Get top clothing item from CLIP
232
+ top_clip_items = get_top_clip_clothing(all_probs, n=1)
233
+ if top_clip_items:
234
+ clip_detected_item, clip_detected_item_prob = top_clip_items[0]
235
+ print(f"Top clothing item identified by CLIP: '{clip_detected_item}' "
236
+ f"with probability {clip_detected_item_prob:.2f}")
237
  else:
238
  print("Warning: CLIP did not identify a top clothing item.")
239
+ clip_detected_item = "fit" # Use a different fallback if CLIP fails
240
 
241
  except Exception as e:
242
+ print(f"Error during CLIP analysis: {e}")
243
+ # Use defaults, maybe return error message?
244
+ return ("<p style='color: #FF5555;'>Error during CLIP analysis.</p>",
245
  None, f"Analysis Error: {e}")
246
 
247
+ # 4) Determine the Final Item to Mention in Response
248
+ final_clothing_item = "style" # Ultimate fallback generic term
249
+ generic_response_needed = False
250
+
251
+ if detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_HIGH_CONF_THRESHOLD:
252
+ # Priority 1: High-confidence fashion model detection
253
+ final_clothing_item = detected_fashion_item_name
254
+ print(f"Using highly confident fashion model item: '{final_clothing_item}'")
255
+ elif detected_fashion_item_name and detected_fashion_item_conf >= YOLO_FASHION_CONF_THRESHOLD:
256
+ # Priority 2: Medium-confidence fashion model detection (still prefer over CLIP)
257
+ final_clothing_item = detected_fashion_item_name
258
+ print(f"Using medium confidence fashion model item: '{final_clothing_item}'")
259
+ elif clip_detected_item and clip_detected_item_prob > 0.05: # Check if CLIP prob is somewhat reasonable
260
+ # Priority 3: CLIP detection (if fashion model didn't provide a strong candidate)
261
+ final_clothing_item = clip_detected_item
262
+ print(f"Using CLIP detected item: '{final_clothing_item}'")
263
+ else:
264
+ # Priority 4: Generic response needed (no confident detection from either model)
265
+ final_clothing_item = random.choice(["fit", "look", "style", "vibe"]) # Randomize generic term
266
+ generic_response_needed = True
267
+ print(f"Using generic fallback item: '{final_clothing_item}'")
268
+
269
+
270
+ # 5) Generate Response and TTS
271
  try:
272
+ response_pool = response_templates[category_key]
273
+
274
+ # If generic response is needed OR category is trash, potentially use more generic templates
275
+ if generic_response_needed or category_key == 'not_drippy':
276
+ # Give higher chance to generic trash responses if category is 'not_drippy'
277
+ if category_key == 'not_drippy':
278
+ # Mix specific item templates with generic ones
279
+ specific_templates = [t for t in response_pool if '{item}' in t]
280
+ generic_templates = [t for t in response_pool if '{item}' not in t]
281
+ # e.g., 70% chance generic, 30% chance specific item mention (even if generic item name)
282
+ if random.random() < 0.7 or generic_response_needed:
283
+ chosen_template = random.choice(generic_templates if generic_templates else response_pool)
284
+ else:
285
+ chosen_template = random.choice(specific_templates if specific_templates else response_pool)
286
+ else: # Mid or Drippy, but generic needed
287
+ chosen_template = random.choice([t for t in response_pool if '{item}' in t] if not generic_response_needed else response_pool)
288
+
289
+ else: # Drippy or Mid, and we have a specific item
290
+ chosen_template = random.choice([t for t in response_pool if '{item}' in t])
291
+
292
+ # Format the response, substituting the determined item name
293
+ # Handle cases where the chosen template might be generic and doesn't have {item}
294
+ if '{item}' in chosen_template:
295
+ response_text = chosen_template.format(item=final_clothing_item)
296
+ else:
297
+ response_text = chosen_template # Use the generic template as is
298
+
299
  tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
300
  tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
301
  tts.save(tts_path)
302
  print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
303
 
304
  # --- Updated HTML Output ---
 
305
  category_html = f"""
306
  <div class='results-container'>
307
  <h2 class='result-category'>RATING: {category_label.upper()}</h2>
 
318
  <p class='result-score' style='color: #FFAAAA;'>Error generating audio/full response.</p>
319
  </div>
320
  """
321
+ # Still provide category info, but indicate TTS/response error
322
  return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
323
 
324
+ # --- Elite Fashion / Techno CSS (Keep your existing CSS) ---
325
+ custom_css = """:root { --primary-bg-color: #000000; --secondary-bg-color: #1A1A1A; --text-color: #FFFFFF; --accent-color: #1F04FF; --border-color: #333333; --input-bg-color: #1A1A1A; --button-text-color: #FFFFFF; --body-text-size: 16px; } body, .gradio-container { background-color: var(--primary-bg-color) !important; color: var(--text-color) !important; font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; font-size: var(--body-text-size); } footer { display: none !important; } .gr-block { background-color: var(--secondary-bg-color) !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; padding: 15px !important; box-shadow: none !important; } .gr-input, .gr-output, .gr-textbox textarea, .gr-dropdown select, .gr-checkboxgroup input { background-color: var(--input-bg-color) !important; color: var(--text-color) !important; border: 1px solid var(--border-color) !important; border-radius: 5px !important; } .gr-textbox textarea::placeholder { color: #888888 !important; } .gr-label span, .gr-label .label-text { color: var(--text-color) !important; font-weight: 500 !important; font-size: 0.95em !important; margin-bottom: 8px !important; } .gr-image { background-color: var(--primary-bg-color) !important; border: 1px dashed var(--border-color) !important; border-radius: 8px !important; overflow: hidden; } .gr-image img { border-radius: 6px !important; object-fit: contain; } .gr-image .no-image, .gr-image .upload-button { color: #AAAAAA !important; } .gr-audio > div:first-of-type { border: 1px solid var(--border-color) !important; background-color: var(--secondary-bg-color) !important; border-radius: 5px !important; padding: 10px !important; } .gr-audio audio { width: 100%; filter: invert(1) hue-rotate(180deg); } .gr-button { border: none !important; border-radius: 5px !important; transition: background-color 0.2s ease, transform 0.1s ease; font-weight: 600 !important; } .gr-button-primary { background-color: var(--accent-color) !important; color: var(--button-text-color) !important; font-size: 1.1em !important; padding: 12px 20px !important; } .gr-button-primary:hover { background-color: #482FFF !important; transform: scale(1.02); box-shadow: 0 0 10px var(--accent-color); } .gr-button-primary:active { transform: scale(0.98); } h1, h2, h3 { color: var(--text-color) !important; font-weight: 600; letter-spacing: 0.5px; } .prose h1 { text-align: center; margin-bottom: 25px !important; font-size: 2em !important; text-transform: uppercase; letter-spacing: 1.5px; } .prose p { color: #CCCCCC !important; font-size: 0.95em; text-align: center; } .results-container { text-align: center; padding: 20px; border: 1px solid var(--accent-color); border-radius: 8px; background: linear-gradient(145deg, var(--secondary-bg-color), #2a2a2a); } .result-category { color: var(--accent-color) !important; font-size: 1.5em; margin-bottom: 5px; font-weight: 700; text-transform: uppercase; } .result-score { color: var(--text-color) !important; font-size: 1.1em; margin-top: 0; } .gradio-container { max-width: 850px !important; margin: auto !important; padding-top: 30px; } .gr-row { gap: 25px !important; }"""
326
 
327
+ # --- Gradio Interface (Using the custom CSS) ---
328
+ with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="neutral", secondary_hue="neutral", text_size=gr.themes.sizes.text_lg)) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  gr.Markdown("<h1>💧 DripAI: Rate Your Fit 💧</h1>")
 
330
  with gr.Row():
331
+ with gr.Column(scale=1, min_width=350):
332
  input_image = gr.Image(
333
  type='pil',
334
+ label="Upload Your Outfit",
335
  sources=['upload', 'webcam', 'clipboard'],
336
+ height=450
337
  )
338
  analyze_button = gr.Button(
339
  "Analyze Outfit",
340
  variant="primary",
 
341
  )
342
+ with gr.Column(scale=1, min_width=350):
343
+ gr.Markdown("### ANALYSIS RESULTS")
344
+ category_html = gr.HTML(label="Rating & Score")
 
345
  response_box = gr.Textbox(
346
  lines=3,
347
+ label="Verbal Feedback",
348
  interactive=False
349
  )
350
  audio_output = gr.Audio(
351
+ autoplay=True, # Keep autoplay off by default
352
  label="Audio Feedback",
353
+ streaming=False # Keep streaming off for pre-recorded TTS
354
  )
355
 
 
356
  analyze_button.click(
357
  fn=analyze_outfit,
358
  inputs=[input_image],
359
  outputs=[category_html, audio_output, response_box]
360
  )
361
+ gr.Markdown("<p>Upload, paste, or use your webcam to capture your outfit. DripAI evaluates your style using multiple AI models.</p>")
 
 
362
 
363
  # --- Launch App ---
364
  if __name__ == "__main__":
365
+ # Make sure 'best.pt' is in the same directory or provide the full path
366
+ if not os.path.exists(YOLO_FASHION_MODEL_PATH):
367
+ print(f"\n{'='*20} WARNING {'='*20}")
368
+ print(f"Fashion model file '{YOLO_FASHION_MODEL_PATH}' not found!")
369
+ print(f"The app will run but fashion item detection will be skipped.")
370
+ print(f"{'='*50}\n")
371
+ # Optionally, you could disable the fashion model part entirely here
372
+ # or raise an error if it's critical.
373
+
374
+ demo.launch(debug=True) # Set debug=False for deployment