Spaces:

dzmu
/

DripAI2Test

Running

App Files Files Community

dzmu commited on Apr 2, 2025

Commit

bade9fb

verified ·

1 Parent(s): 56b01f7

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -96

app.py CHANGED Viewed

@@ -8,73 +8,35 @@ from PIL import Image
 from ultralytics import YOLO
 from gtts import gTTS
 import uuid
-import time
 import tempfile
 device = "cuda" if torch.cuda.is_available() else "cpu"
 clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
 yolo_model = YOLO('yolov8n.pt').to(device)
-fashion_model = YOLO('best.pt').to(device)  # If needed
 style_prompts = {
-    'drippy': [
-        "avant-garde streetwear",
-        "high-fashion designer outfit",
-        "trendsetting urban attire",
-        "luxury sneakers and chic accessories",
-        "cutting-edge, bold style"
-    ],
-    'mid': [
-        "casual everyday outfit",
-        "modern minimalistic attire",
-        "comfortable yet stylish look",
-        "simple, relaxed streetwear",
-        "balanced, practical fashion"
-    ],
-    'not_drippy': [
-        "disheveled outfit",
-        "poorly coordinated fashion",
-        "unfashionable, outdated attire",
-        "tacky, mismatched ensemble",
-        "sloppy, uninspired look"
-    ]
 }
-clothing_prompts = [
-    "t-shirt", "dress shirt", "blouse", "hoodie", "jacket", "sweater", "coat",
-    "dress", "skirt", "pants", "jeans", "trousers", "shorts",
-    "sneakers", "boots", "heels", "sandals",
-    "cap", "hat", "scarf", "gloves", "bag", "accessory", "tank-top", "haircut"
-]
 response_templates = {
-    'drippy': [
-        "You're Drippy, bruh – fire {item}!",
-        "{item} goes crazy, on god!",
-        "Certified drippy with that {item}."
-    ],
-    'mid': [
-        "Drop the {item} and you might get a text back.",
-        "It's alright, but I'd upgrade the {item}.",
-        "Mid fit alert. That {item} is holding you back."
-    ],
-    'not_drippy': [
-        "Bro thought that {item} was tuff!",
-        "Oh hell nah! Burn that {item}!",
-        "Crimes against fashion, especially that {item}! Also… maybe get a haircut.",
-        "Never walk out the house again with that {item}."
-    ]
 }
-# Map "not_drippy" => "trash" in user-facing output
 CATEGORY_LABEL_MAP = {
     "drippy": "drippy",
     "mid": "mid",
     "not_drippy": "trash"
 }
-# Combine all prompts for CLIP
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
@@ -86,13 +48,11 @@ def get_top_clothing(probs, n=3):
     return [clothing_prompts[i] for i in reversed(top_indices)]
 def analyze_outfit(img: Image.Image):
-    # 1) YOLO detection
     results = yolo_model(img)
     boxes = results[0].boxes.xyxy.cpu().numpy()
     classes = results[0].boxes.cls.cpu().numpy()
     confidences = results[0].boxes.conf.cpu().numpy()
-    # Crop if person is found
     person_indices = np.where(classes == 0)[0]
     cropped_img = img
     if len(person_indices) > 0:
@@ -100,73 +60,78 @@ def analyze_outfit(img: Image.Image):
         x1, y1, x2, y2 = map(int, boxes[person_indices][max_conf_idx])
         cropped_img = img.crop((x1, y1, x2, y2))
-    # 2) CLIP analysis
     image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(device)
     text_tokens = clip.tokenize(all_prompts).to(device)
     with torch.no_grad():
         logits, _ = clip_model(image_tensor, text_tokens)
         probs = logits.softmax(dim=-1).cpu().numpy()[0]
-    # Style classification
-    drip_len = len(style_prompts['drippy'])
-    mid_len = len(style_prompts['mid'])
-    not_len = len(style_prompts['not_drippy'])
-    drip_score = np.mean(probs[:drip_len])
-    mid_score = np.mean(probs[drip_len : drip_len + mid_len])
-    not_score = np.mean(probs[drip_len + mid_len : drip_len + mid_len + not_len])
-    if drip_score > mid_score and drip_score > not_score:
-        category_key = 'drippy'
-        final_score = drip_score
-    elif mid_score > not_score:
-        category_key = 'mid'
-        final_score = mid_score
-    else:
-        category_key = 'not_drippy'
-        final_score = not_score
-    category_label = CATEGORY_LABEL_MAP[category_key]
-    # Clothing item
     clothing_items = get_top_clothing(probs)
     clothing_item = clothing_items[0]
-    # Random response
     response = random.choice(response_templates[category_key]).format(item=clothing_item)
-    # TTS MP3
     tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
-    tts = gTTS(response, lang="en")
-    tts.save(tts_path)
-    # Round the score
-    final_score_str = f"{final_score:.2f}"
-    # Output HTML for category + numeric score
     category_html = f"""
-        <h2>Your fit is {category_label}!</h2>
-        <p>Drip Score: {final_score_str}</p>
     """
     return category_html, tts_path, response
-###############################################################################
-# Custom Layout with Blocks
-###############################################################################
-with gr.Blocks(css=".container {max-width: 800px; margin: 0 auto;}") as demo:
-    gr.Markdown("## DripAI")
-    with gr.Group(elem_classes=["container"]):
-        input_image = gr.Image(
-            type='pil',
-            label="Upload your outfit"
-        )
-        analyze_button = gr.Button("Analyze Outfit")
-        # Output components
         category_html = gr.HTML()
-        audio_output = gr.Audio(autoplay=True, label="Audio Feedback")
-        response_box = gr.Textbox(lines=3, label="Response")
         analyze_button.click(
             fn=analyze_outfit,

 from ultralytics import YOLO
 from gtts import gTTS
 import uuid
 import tempfile
+# Device and model loading
 device = "cuda" if torch.cuda.is_available() else "cpu"
 clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
 yolo_model = YOLO('yolov8n.pt').to(device)
+fashion_model = YOLO('best.pt').to(device)
+# Style prompts and templates
 style_prompts = {
+    'drippy': [...],  # truncated for brevity
+    'mid': [...],
+    'not_drippy': [...]
 }
+clothing_prompts = [...]
 response_templates = {
+    'drippy': [...],
+    'mid': [...],
+    'not_drippy': [...]
 }
 CATEGORY_LABEL_MAP = {
     "drippy": "drippy",
     "mid": "mid",
     "not_drippy": "trash"
 }
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
     return [clothing_prompts[i] for i in reversed(top_indices)]
 def analyze_outfit(img: Image.Image):
     results = yolo_model(img)
     boxes = results[0].boxes.xyxy.cpu().numpy()
     classes = results[0].boxes.cls.cpu().numpy()
     confidences = results[0].boxes.conf.cpu().numpy()
     person_indices = np.where(classes == 0)[0]
     cropped_img = img
     if len(person_indices) > 0:
         x1, y1, x2, y2 = map(int, boxes[person_indices][max_conf_idx])
         cropped_img = img.crop((x1, y1, x2, y2))
     image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(device)
     text_tokens = clip.tokenize(all_prompts).to(device)
     with torch.no_grad():
         logits, _ = clip_model(image_tensor, text_tokens)
         probs = logits.softmax(dim=-1).cpu().numpy()[0]
+    drip_score = np.mean(probs[:len(style_prompts['drippy'])])
+    mid_score = np.mean(probs[len(style_prompts['drippy']):len(style_prompts['drippy'])+len(style_prompts['mid'])])
+    not_score = np.mean(probs[len(style_prompts['drippy'])+len(style_prompts['mid']):])
+    category_key = max(['drippy', 'mid', 'not_drippy'], key=lambda k: np.mean(
+        probs[:len(style_prompts[k])] if k == 'drippy' else
+        probs[len(style_prompts['drippy']):len(style_prompts['drippy'])+len(style_prompts['mid'])] if k == 'mid' else
+        probs[len(style_prompts['drippy'])+len(style_prompts['mid']):]
+    ))
+    category_label = CATEGORY_LABEL_MAP[category_key]
     clothing_items = get_top_clothing(probs)
     clothing_item = clothing_items[0]
     response = random.choice(response_templates[category_key]).format(item=clothing_item)
     tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
+    gTTS(response, lang="en").save(tts_path)
+    final_score_str = f"{max(drip_score, mid_score, not_score):.2f}"
     category_html = f"""
+        <div style='text-align: center;'>
+            <h2 style='color: #1f04ff;'>Your fit is <b>{category_label.upper()}</b></h2>
+            <p style='font-size: 18px;'>Drip Score: <strong>{final_score_str}</strong></p>
+        </div>
     """
     return category_html, tts_path, response
+# Gradio interface with cleaner styling
+custom_css = """
+.container {
+    max-width: 700px;
+    margin: 0 auto;
+    font-family: 'Arial', sans-serif;
+}
+button {
+    background-color: #1f04ff;
+    color: white;
+    border-radius: 6px;
+    padding: 10px 20px;
+    font-size: 16px;
+}
+button:hover {
+    background-color: #3c2fff;
+}
+.gradio-container {
+    background: #f9f9f9;
+    border-radius: 10px;
+    padding: 20px;
+    box-shadow: 0 4px 10px rgba(0,0,0,0.1);
+}
+"""
+with gr.Blocks(css=custom_css) as demo:
+    with gr.Column(elem_classes=["container"]):
+        gr.Markdown("""
+            # 👟 DripAI
+            Upload your outfit to get judged by the algorithm.
+            No bias. No mercy. Just drip.
+        """)
+        input_image = gr.Image(type='pil', label="Upload your outfit")
+        analyze_button = gr.Button("Analyze My Fit")
         category_html = gr.HTML()
+        audio_output = gr.Audio(autoplay=True, label="AI Feedback")
+        response_box = gr.Textbox(lines=2, label="Generated Response")
         analyze_button.click(
             fn=analyze_outfit,