Spaces:

dzmu
/

DripAI2Test

Running

App Files Files Community

dzmu commited on Apr 2, 2025

Commit

76d5e1f

verified ·

1 Parent(s): aad2489

Update app.py

Browse files

Files changed (1) hide show

app.py +215 -60

app.py CHANGED Viewed

@@ -25,7 +25,6 @@ try:
 except Exception as e:
     print(f"Error loading CLIP model: {e}")
     # Handle error
 try:
     yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH).to(DEVICE)
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
@@ -34,13 +33,6 @@ except Exception as e:
     # Handle error
 # REMOVED Fashion Model Loading
-# try:
-#     fashion_model = YOLO(YOLO_FASHION_MODEL_PATH).to(DEVICE)
-#     print(f"YOLO fashion model ({YOLO_FASHION_MODEL_PATH}) loaded successfully.")
-#     if not hasattr(fashion_model, 'names') or not fashion_model.names:
-#          print("Warning: Fashion model names not found.")
-# except Exception as e:
-#     print(f"Error loading YOLO fashion model: {e}")
 # --- Prompts and Responses ---
 style_prompts = {
@@ -70,6 +62,7 @@ clothing_prompts = [
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
 # Record end of style prompts before adding clothing prompts
 style_prompts_end_index = len(all_prompts)
 all_prompts.extend(clothing_prompts)
@@ -89,41 +82,31 @@ response_templates = {
         "Never walk out the house again with that {item}."
     ]
 }
 CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
 # --- REINSTATED: Function to get top clothing items based on CLIP probabilities ---
 def get_top_clothing(probs, n=3):
     """Gets the top N clothing items based on CLIP probabilities."""
-    # Calculate the start index of clothing probabilities in the combined 'probs' array
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
-    # Ensure we don't request more items than available prompts
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
-        return ["item"] # Return default if no clothing prompts
-    # Get indices of top N probabilities within the clothing_probs slice
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
-    # Return the corresponding clothing prompt names in descending order of probability
     return [clothing_prompts[i] for i in reversed(top_indices_in_slice)]
 # --- Core Logic ---
 def analyze_outfit(input_img: Image.Image):
     if input_img is None:
-        return "Please upload an image.", None, "Error: No image provided."
     img = input_img.copy()
-    # 1) YOLO Person Detection (Same as before)
     person_results = yolo_person_model(img, verbose=False)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
     person_indices = np.where(classes == 0)[0]
     cropped_img = img
     if len(person_indices) > 0:
@@ -139,32 +122,23 @@ def analyze_outfit(input_img: Image.Image):
             cropped_img = img
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
-        # Decide if you want to proceed or return an error
-    # --- REMOVED: YOLO Fashion Detection ---
-    # 2) CLIP Analysis (Using ALL prompts - Style + Clothing)
-    detected_clothing_item = "look" # Default if something goes wrong
     try:
         image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
-        # --- Use all_prompts for tokenization ---
         text_tokens = clip.tokenize(all_prompts).to(DEVICE)
         with torch.no_grad():
             logits, _ = clip_model(image_tensor, text_tokens)
-            # --- Probabilities for ALL prompts ---
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
-        # Calculate average scores for each style category based on their slices in all_probs
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
-        # not_len = len(style_prompts['not_drippy']) # Calculated implicitly below
         drip_score = np.mean(all_probs[0 : drip_len])
         mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
-        not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index]) # Scores up to end of style prompts
-        # Determine the category based on highest average score
         if drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
@@ -179,63 +153,244 @@ def analyze_outfit(input_img: Image.Image):
         final_score_str = f"{final_score:.2f}"
         print(f"Style analysis: Category={category_label}, Score={final_score_str}")
-        # --- REINSTATED: Get clothing item using CLIP probs ---
-        clothing_items_detected_by_clip = get_top_clothing(all_probs, n=1) # Get top 1 item
         if clothing_items_detected_by_clip:
              detected_clothing_item = clothing_items_detected_by_clip[0]
              print(f"Top clothing item identified by CLIP: {detected_clothing_item}")
         else:
              print("Warning: CLIP did not identify a top clothing item.")
-             detected_clothing_item = "fit" # Fallback if get_top_clothing fails
     except Exception as e:
         print(f"Error during CLIP analysis or clothing selection: {e}")
-        return "Error during analysis.", None, f"Analysis Error: {e}"
-    # 3) Generate Response and TTS (Same as before, but uses item from CLIP)
     try:
         response_text = random.choice(response_templates[category_key]).format(item=detected_clothing_item)
         tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
         tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
         tts.save(tts_path)
         print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
         category_html = f"""
-            <div style='text-align: center; padding: 15px; border: 1px solid #eee; border-radius: 8px;'>
-                <h2 style='color: #333; margin-bottom: 5px;'>Your fit is {category_label.upper()}!</h2>
-                <p style='font-size: 1.1em; color: #555; margin-top: 0;'>Style Score: {final_score_str}</p>
             </div>
         """
         return category_html, tts_path, response_text
     except Exception as e:
         print(f"Error during response/TTS generation: {e}")
-        category_html = f"<h2>Result: {category_label} (Score: {final_score_str})</h2>"
         return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
-# --- Gradio Interface (Unchanged) ---
-with gr.Blocks(css=".gradio-container { max-width: 800px !important; margin: auto !important; } footer { display: none !important; }") as demo:
-    gr.Markdown("<h1 style='text-align: center; margin-bottom: 20px;'>💧 DripAI: Rate Your Fit 💧</h1>")
     with gr.Row():
-        with gr.Column(scale=1):
             input_image = gr.Image(
-                type='pil', label="Upload, Paste, or Use Webcam for your Outfit Photo",
-                sources=['upload', 'webcam', 'clipboard'], height=400
             )
-            analyze_button = gr.Button("Analyze Outfit", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            gr.Markdown("### Analysis Result:")
-            category_html = gr.HTML(label="Category & Score")
-            audio_output = gr.Audio(autoplay=True, label="Audio Feedback", streaming=False)
-            response_box = gr.Textbox(lines=4, label="Text Feedback", interactive=False)
     analyze_button.click(
-        fn=analyze_outfit, inputs=[input_image], outputs=[category_html, audio_output, response_box]
     )
-    gr.Markdown("<p style='text-align: center; color: grey; font-size: 0.9em;'>Upload an image of your outfit and click 'Analyze Outfit'. DripAI will rate your style and identify a key clothing item.</p>")
 # --- Launch App ---
 if __name__ == "__main__":
-    demo.launch(debug=True) # Assumes debug is helpful on HF too, might remove later

 except Exception as e:
     print(f"Error loading CLIP model: {e}")
     # Handle error
 try:
     yolo_person_model = YOLO(YOLO_PERSON_MODEL_PATH).to(DEVICE)
     print(f"YOLO person detection model ({YOLO_PERSON_MODEL_PATH}) loaded successfully.")
     # Handle error
 # REMOVED Fashion Model Loading
 # --- Prompts and Responses ---
 style_prompts = {
 all_prompts = []
 for cat_prompts in style_prompts.values():
     all_prompts.extend(cat_prompts)
 # Record end of style prompts before adding clothing prompts
 style_prompts_end_index = len(all_prompts)
 all_prompts.extend(clothing_prompts)
         "Never walk out the house again with that {item}."
     ]
 }
 CATEGORY_LABEL_MAP = { "drippy": "drippy", "mid": "mid", "not_drippy": "trash" }
 # --- REINSTATED: Function to get top clothing items based on CLIP probabilities ---
 def get_top_clothing(probs, n=3):
     """Gets the top N clothing items based on CLIP probabilities."""
     clothing_probs_start_index = style_prompts_end_index
     clothing_probs = probs[clothing_probs_start_index:]
     actual_n = min(n, len(clothing_prompts))
     if actual_n <= 0:
+        return ["item"]
     top_indices_in_slice = np.argsort(clothing_probs)[-actual_n:]
     return [clothing_prompts[i] for i in reversed(top_indices_in_slice)]
 # --- Core Logic ---
 def analyze_outfit(input_img: Image.Image):
     if input_img is None:
+        return ("<p style='color: #FF5555; text-align: center;'>Please upload an image.</p>",
+                None, "Error: No image provided.")
     img = input_img.copy()
+    # 1) YOLO Person Detection
     person_results = yolo_person_model(img, verbose=False)
     boxes = person_results[0].boxes.xyxy.cpu().numpy()
     classes = person_results[0].boxes.cls.cpu().numpy()
     confidences = person_results[0].boxes.conf.cpu().numpy()
     person_indices = np.where(classes == 0)[0]
     cropped_img = img
     if len(person_indices) > 0:
             cropped_img = img
     else:
         print("No person detected by yolo_person_model. Analyzing full image.")
+    # 2) CLIP Analysis
+    detected_clothing_item = "look"
     try:
         image_tensor = clip_preprocess(cropped_img).unsqueeze(0).to(DEVICE)
         text_tokens = clip.tokenize(all_prompts).to(DEVICE)
         with torch.no_grad():
             logits, _ = clip_model(image_tensor, text_tokens)
             all_probs = logits.softmax(dim=-1).cpu().numpy()[0]
         drip_len = len(style_prompts['drippy'])
         mid_len = len(style_prompts['mid'])
         drip_score = np.mean(all_probs[0 : drip_len])
         mid_score = np.mean(all_probs[drip_len : drip_len + mid_len])
+        not_score = np.mean(all_probs[drip_len + mid_len : style_prompts_end_index])
         if drip_score > mid_score and drip_score > not_score:
             category_key = 'drippy'
             final_score = drip_score
         final_score_str = f"{final_score:.2f}"
         print(f"Style analysis: Category={category_label}, Score={final_score_str}")
+        clothing_items_detected_by_clip = get_top_clothing(all_probs, n=1)
         if clothing_items_detected_by_clip:
              detected_clothing_item = clothing_items_detected_by_clip[0]
              print(f"Top clothing item identified by CLIP: {detected_clothing_item}")
         else:
              print("Warning: CLIP did not identify a top clothing item.")
+             detected_clothing_item = "fit"
     except Exception as e:
         print(f"Error during CLIP analysis or clothing selection: {e}")
+        return ("<p style='color: #FF5555;'>Error during analysis.</p>",
+                None, f"Analysis Error: {e}")
+    # 3) Generate Response and TTS
     try:
         response_text = random.choice(response_templates[category_key]).format(item=detected_clothing_item)
         tts_path = os.path.join(tempfile.gettempdir(), f"drip_{uuid.uuid4().hex}.mp3")
         tts = gTTS(text=response_text, lang='en', tld='com', slow=False)
         tts.save(tts_path)
         print(f"Generated TTS response: '{response_text}' saved to {tts_path}")
+        # --- Updated HTML Output ---
+        # Simpler structure, relies more on CSS for styling defined below
         category_html = f"""
+            <div class='results-container'>
+                <h2 class='result-category'>RATING: {category_label.upper()}</h2>
+                <p class='result-score'>Style Score: {final_score_str}</p>
             </div>
         """
         return category_html, tts_path, response_text
     except Exception as e:
         print(f"Error during response/TTS generation: {e}")
+        category_html = f"""
+            <div class='results-container'>
+                <h2 class='result-category'>Result: {category_label.upper()} (Score: {final_score_str})</h2>
+                <p class='result-score' style='color: #FFAAAA;'>Error generating audio/full response.</p>
+            </div>
+        """
         return category_html, None, f"Analysis complete ({category_label}), but error generating audio/response."
+# --- Elite Fashion / Techno CSS ---
+custom_css = """
+:root {
+    --primary-bg-color: #000000;
+    --secondary-bg-color: #1A1A1A;
+    --text-color: #FFFFFF;
+    --accent-color: #1F04FF;
+    --border-color: #333333; /* Slightly lighter than secondary bg for subtle definition */
+    --input-bg-color: #1A1A1A;
+    --button-text-color: #FFFFFF;
+    --body-text-size: 16px; /* Base text size */
+}
+/* --- Global Styles --- */
+body, .gradio-container {
+    background-color: var(--primary-bg-color) !important;
+    color: var(--text-color) !important;
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; /* Modern font stack */
+    font-size: var(--body-text-size);
+}
+/* Hide default Gradio footer */
+footer { display: none !important; }
+/* --- Component Styling --- */
+.gr-block { /* General block container */
+    background-color: var(--secondary-bg-color) !important;
+    border: 1px solid var(--border-color) !important;
+    border-radius: 8px !important; /* Slightly rounded corners */
+    padding: 15px !important;
+    box-shadow: none !important; /* Remove default shadows */
+}
+/* Input/Output Text Areas & General inputs */
+.gr-input, .gr-output, .gr-textbox textarea, .gr-dropdown select, .gr-checkboxgroup input {
+    background-color: var(--input-bg-color) !important;
+    color: var(--text-color) !important;
+    border: 1px solid var(--border-color) !important;
+    border-radius: 5px !important;
+}
+.gr-textbox textarea::placeholder { /* Style placeholder text if needed */
+    color: #888888 !important;
+}
+/* Component Labels */
+.gr-label span, .gr-label .label-text {
+    color: var(--text-color) !important;
+    font-weight: 500 !important; /* Slightly bolder labels */
+    font-size: 0.95em !important;
+    margin-bottom: 8px !important; /* Space below label */
+}
+/* Image Input/Output */
+.gr-image {
+   background-color: var(--primary-bg-color) !important; /* Match main background */
+   border: 1px dashed var(--border-color) !important; /* Dashed border for drop zone */
+   border-radius: 8px !important;
+   overflow: hidden; /* Ensure image stays within bounds */
+}
+.gr-image img {
+    border-radius: 6px !important; /* Slightly round image corners */
+    object-fit: contain; /* Ensure image fits well */
+}
+.gr-image .no-image, .gr-image .upload-button { /* Placeholder text/button inside image component */
+     color: #AAAAAA !important;
+}
+/* Audio Component */
+.gr-audio > div:first-of-type { /* Target the container around the audio player */
+    border: 1px solid var(--border-color) !important;
+    background-color: var(--secondary-bg-color) !important;
+    border-radius: 5px !important;
+    padding: 10px !important;
+}
+.gr-audio audio { /* Style the audio player itself */
+    width: 100%; /* Make player responsive */
+    filter: invert(1) hue-rotate(180deg); /* Basic dark theme for player controls */
+}
+/* --- Button Styling --- */
+.gr-button { /* General button style reset */
+    border: none !important;
+    border-radius: 5px !important;
+    transition: background-color 0.2s ease, transform 0.1s ease;
+    font-weight: 600 !important;
+}
+.gr-button-primary { /* Specific styling for the primary Analyze button */
+    background-color: var(--accent-color) !important;
+    color: var(--button-text-color) !important;
+    font-size: 1.1em !important; /* Make primary button slightly larger */
+    padding: 12px 20px !important; /* Adjust padding */
+}
+.gr-button-primary:hover {
+    background-color: #482FFF !important; /* Slightly lighter blue on hover */
+    transform: scale(1.02); /* Subtle scale effect */
+    box-shadow: 0 0 10px var(--accent-color); /* Add a glow effect */
+}
+.gr-button-primary:active {
+    transform: scale(0.98); /* Press down effect */
+}
+/* --- Typography & Content --- */
+h1, h2, h3 {
+    color: var(--text-color) !important;
+    font-weight: 600; /* Bold headings */
+    letter-spacing: 0.5px; /* Add slight letter spacing */
+}
+.prose h1 { /* Target Markdown H1 specifically if needed */
+    text-align: center;
+    margin-bottom: 25px !important;
+    font-size: 2em !important; /* Larger title */
+    text-transform: uppercase; /* Uppercase for impact */
+    letter-spacing: 1.5px;
+}
+.prose p { /* Target Markdown Paragraph */
+     color: #CCCCCC !important; /* Slightly dimmer text for descriptions */
+     font-size: 0.95em;
+     text-align: center;
+}
+/* Custom styling for the results HTML block */
+.results-container {
+    text-align: center;
+    padding: 20px;
+    border: 1px solid var(--accent-color); /* Use accent color for border */
+    border-radius: 8px;
+    background: linear-gradient(145deg, var(--secondary-bg-color), #2a2a2a); /* Subtle gradient */
+}
+.result-category {
+    color: var(--accent-color) !important; /* Use accent color for category */
+    font-size: 1.5em;
+    margin-bottom: 5px;
+    font-weight: 700;
+    text-transform: uppercase;
+}
+.result-score {
+    color: var(--text-color) !important;
+    font-size: 1.1em;
+    margin-top: 0;
+}
+/* --- Layout Adjustments --- */
+.gradio-container {
+    max-width: 850px !important; /* Slightly wider max-width */
+    margin: auto !important;
+    padding-top: 30px; /* Add some space at the top */
+}
+.gr-row {
+    gap: 25px !important; /* Increase gap between columns */
+}
+"""
+# --- Gradio Interface (Now using the custom CSS) ---
+with gr.Blocks(css=custom_css, theme=gr.themes.Base(primary_hue="neutral", secondary_hue="neutral", text_size=gr.themes.sizes.text_lg)) as demo: # Use Base theme to minimize default styles
+    # Title using Markdown (styled by CSS)
+    gr.Markdown("<h1>💧 DripAI: Rate Your Fit 💧</h1>")
     with gr.Row():
+        with gr.Column(scale=1, min_width=350): # Assign min width for better responsiveness
             input_image = gr.Image(
+                type='pil',
+                label="Upload Your Outfit", # Simpler label
+                sources=['upload', 'webcam', 'clipboard'],
+                height=450 # Slightly taller image area
+            )
+            analyze_button = gr.Button(
+                "Analyze Outfit",
+                variant="primary",
+                # size="lg" removed, controlled by CSS
+            )
+        with gr.Column(scale=1, min_width=350): # Assign min width
+            gr.Markdown("### ANALYSIS RESULTS") # Simple heading
+            category_html = gr.HTML(label="Rating & Score") # Label for screen readers/context
+            response_box = gr.Textbox(
+                lines=3,
+                label="Verbal Feedback", # Updated label
+                interactive=False
             )
+            audio_output = gr.Audio(
+                autoplay=False, # Changed default to false, user can click play
+                label="Audio Feedback",
+                streaming=False
+            )
+    # Bind the analysis function to the button click
     analyze_button.click(
+        fn=analyze_outfit,
+        inputs=[input_image],
+        outputs=[category_html, audio_output, response_box]
     )
+    # Footer description text
+    gr.Markdown("<p>Upload, paste, or use your webcam to capture your outfit. DripAI evaluates your style.</p>")
 # --- Launch App ---
 if __name__ == "__main__":
+    demo.launch(debug=True)