Spaces:

FraunhoferIPK
/

KIKERP_Demo

Sleeping

File size: 6,886 Bytes

afb9dad
 
2955ae6
a64527c
afb9dad
 
5a474ac
17611a2
5a474ac
e69763d
a64527c
 
5a474ac
afb9dad
4f37c46
5a474ac
afb9dad
9c2cb20
afb9dad
a64527c
e69763d
afb9dad
a64527c
 
 
e69763d
a64527c
 
 
 
 
 
 
 
 
9c2cb20
a64527c
 
17611a2
9c2cb20
17611a2
7135735
 
 
 
a64527c
7135735
e69763d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2cb20
e69763d
 
a64527c
ca5f4b2
 
a64527c
ca5f4b2
e69763d
 
9c2cb20
e69763d
 
 
 
 
 
 
a64527c
 
9c2cb20
a64527c
 
ca5f4b2
 
 
 
a64527c
 
 
2955ae6
e69763d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a64527c
9c2cb20
e69763d
 
 
 
 
 
 
 
a64527c
e69763d
 
 
 
 
 
a64527c
e69763d
 
 
 
 
 
 
 
 
 
9c2cb20
afb9dad
a64527c
e69763d
 
a64527c
 
 
 
 
 
 
9c2cb20
afb9dad
 
9c2cb20

import gradio as gr
import os
import json
import time
from dotenv import load_dotenv

from google import genai
from google.genai import types

# Import our DINO processing function
from dino_processor import process_video_with_dino

# --- Configuration and Client Initialization ---
load_dotenv()
try:
    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
    raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")

# --- Main Gradio Function ---
def full_analysis(video_file_path, user_observations):
    if not video_file_path:
        # Return empty values for all components
        return "Please upload a video first.", "", "", None, None

    # --- DINO Analysis (runs in parallel with Gemini upload) ---
    print("--- Starting DINO Analysis ---")
    try:
        dino_results = process_video_with_dino(video_file_path)
        overlay_images = [res[0] for res in dino_results]
        attention_maps = [res[1] for res in dino_results]
        print("--- DINO Analysis Complete ---")
    except Exception as e:
        print(f"ERROR during DINO processing: {e}")
        return f"Error in DINO processing: {e}", "", "", None, None

    # --- Gemini Analysis ---
    print("--- Starting Gemini Analysis ---")
    uploaded_file = None
    try:
        uploaded_file = client.files.upload(file=video_file_path)
        while uploaded_file.state.name == "PROCESSING":
            time.sleep(5)
            uploaded_file = client.files.get(name=uploaded_file.name)
        if uploaded_file.state.name != "ACTIVE":
            raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
        
        # --- NEW, MORE ADVANCED PROMPT ---
        prompt = f"""
        Analyze the provided video and the user's observations. Respond ONLY with a valid JSON object.

        User Observations: "{user_observations if user_observations else 'None'}"

        Follow these steps:
        1. First, identify the primary object in the video. Is it an electronic device or a white good (e.g., refrigerator, washing machine, microwave)?
        2. If it IS an electronic device or white good, respond with a JSON object with these three keys:
           - "device_type": A short string identifying the specific device (e.g., "iPhone 13 Pro", "Samsung Washing Machine").
           - "condition": A single word describing its visual condition. Choose ONLY from: "unusable", "poor", "functional", "good", "excellent".
           - "reason": A very brief, concise string explaining the condition (e.g., "heavy screen cracks", "minor scratches on body", "clean with no visible marks").
        3. If it is NOT an electronic device or white good, respond with a JSON object with only ONE key:
           - "device_type": A string starting with "Invalid: " followed by the object's category (e.g., "Invalid: Water Bottle", "Invalid: Shoe").

        Example for a valid device:
        {{
          "device_type": "Dell XPS Laptop",
          "condition": "functional",
          "reason": "Visible scratches and paint erosion on the lid."
        }}

        Example for an invalid object:
        {{
          "device_type": "Invalid: Coffee Mug"
        }}
        """
        
        model_name = "gemini-1.5-flash-latest"
        config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
        contents = [uploaded_file, prompt]
        
        response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
        parsed_json = json.loads(response.text)
        
        # Check if the device was valid or not
        device_type = parsed_json.get("device_type", "N/A")
        if "Invalid:" in device_type:
            condition = "N/A"
            reason = "Object is not a valid electronic device or white good."
        else:
            condition = parsed_json.get("condition", "N/A")
            reason = parsed_json.get("reason", "N/A")
            
        print("--- Gemini Analysis Complete ---")
        
    except Exception as e:
        print(f"ERROR during Gemini processing: {e}")
        device_type, condition, reason = f"Error in Gemini processing: {e}", "", ""
    finally:
        if uploaded_file:
            client.files.delete(name=uploaded_file.name)

    # Return all the results to the Gradio UI
    return device_type, condition, reason, overlay_images, attention_maps

# --- Gradio Interface ---
# Define a custom theme
theme = gr.themes.Soft(
    primary_hue="green",
    secondary_hue="emerald",
).set(
    body_background_fill="#E0F2F1", # A light aquatic green
    block_background_fill="white",
    block_border_width="1px",
    block_shadow="*shadow_drop_lg",
    button_primary_background_fill="*primary_500",
    button_primary_text_color="white",
)

with gr.Blocks(theme=theme) as demo:
    # Using a refrigerator emoji for white goods 🧊 or 🔌
    gr.Markdown("# 🧊 KIKERP: AI Assessment Tool")
    gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.")
    
    with gr.Row():
        with gr.Column(scale=1):
            video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
            # NEW: Optional text input for user observations
            observations_input = gr.Textbox(
                label="Optional: User Observations", 
                placeholder="e.g., Device overheats, screen flickers, makes a strange noise..."
            )
            submit_button = gr.Button("Run Full Analysis", variant="primary")
        
        with gr.Column(scale=2):
            gr.Markdown("## Gemini Condition Analysis")
            with gr.Row():
                device_type_output = gr.Textbox(label="Device Type")
                condition_output = gr.Textbox(label="Condition")
                reason_output = gr.Textbox(label="Reason / Details")
    
    gr.Markdown("---") # Adds a horizontal line for separation
    
    gr.Markdown("## DINO: Visual Feature Analysis")
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Overlayed Heatmaps on Key Frames")
            overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=5)
        with gr.Column():
            gr.Markdown("### Self-Attention Maps")
            attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery", columns=5)

    submit_button.click(
        fn=full_analysis,
        # Add the new observations_input to the inputs list
        inputs=[video_input, observations_input],
        outputs=[
            device_type_output, 
            condition_output, 
            reason_output, 
            overlay_gallery, 
            attention_gallery
        ],
        show_progress='full'
    )

demo.launch(debug=True)