Spaces:

FraunhoferIPK
/

KIKERP_Demo

Sleeping

File size: 4,542 Bytes

afb9dad
 
2955ae6
a64527c
afb9dad
 
5a474ac
17611a2
5a474ac
a64527c
 
 
5a474ac
afb9dad
4f37c46
5a474ac
afb9dad
9c2cb20
afb9dad
a64527c
 
afb9dad
a64527c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2cb20
a64527c
 
17611a2
9c2cb20
a64527c
17611a2
7135735
 
 
 
a64527c
7135735
9c2cb20
641b9cd
2955ae6
641b9cd
 
9c2cb20
a64527c
 
ca5f4b2
 
a64527c
ca5f4b2
9c2cb20
 
 
a64527c
 
9c2cb20
a64527c
 
ca5f4b2
 
 
 
a64527c
 
 
2955ae6
afb9dad
a64527c
 
 
2955ae6
a64527c
 
 
9c2cb20
 
 
 
a64527c
 
 
 
 
 
 
9c2cb20
afb9dad
a64527c
afb9dad
a64527c
 
 
 
 
 
 
9c2cb20
afb9dad
 
9c2cb20

import gradio as gr
import os
import json
import time
from dotenv import load_dotenv

from google import genai
from google.genai import types

# Import our new DINO processing function
from dino_processor import process_video_with_dino

# --- Configuration and Client Initialization ---
load_dotenv()
try:
    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
    raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")

# --- Main Gradio Function ---
def full_analysis(video_file_path):
    if not video_file_path:
        # Return empty values for all components
        return "Please upload a video first.", "", "", None, None

    # --- DINO Analysis ---
    print("--- Starting DINO Analysis ---")
    try:
        # This function will return a list of tuples: [(overlay1, attn1), (overlay2, attn2), ...]
        dino_results = process_video_with_dino(video_file_path)
        overlay_images = [res[0] for res in dino_results]
        attention_maps = [res[1] for res in dino_results]
        print("--- DINO Analysis Complete ---")
    except Exception as e:
        print(f"ERROR during DINO processing: {e}")
        # Return an error message and empty galleries
        return f"Error in DINO processing: {e}", "", "", None, None

    # --- Gemini Analysis ---
    print("--- Starting Gemini Analysis ---")
    uploaded_file = None
    try:
        # Wait for DINO to finish before starting the Gemini upload
        uploaded_file = client.files.upload(file=video_file_path)
        while uploaded_file.state.name == "PROCESSING":
            time.sleep(5)
            uploaded_file = client.files.get(name=uploaded_file.name)
        if uploaded_file.state.name != "ACTIVE":
            raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
        
        prompt = """
        Analyze the provided video. Respond ONLY with a valid JSON object with three keys:
        1. "device_type": A short string identifying the device.
        2. "condition": A single word: "Mint", "Excellent", "Good", "Fair", or "Poor".
        3. "reason": A brief string explaining the condition.
        """
        model_name = "gemini-1.5-flash-latest" # Using a reliable public model
        config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
        contents = [uploaded_file, prompt]
        
        response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
        parsed_json = json.loads(response.text)
        device_type = parsed_json.get("device_type", "N/A")
        condition = parsed_json.get("condition", "N/A")
        reason = parsed_json.get("reason", "N/A")
        print("--- Gemini Analysis Complete ---")
        
    except Exception as e:
        print(f"ERROR during Gemini processing: {e}")
        device_type, condition, reason = f"Error in Gemini processing: {e}", "", ""
    finally:
        if uploaded_file:
            client.files.delete(name=uploaded_file.name)

    # Return all the results to the Gradio UI
    return device_type, condition, reason, overlay_images, attention_maps

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📱 Advanced Device Condition Analyzer")
    gr.Markdown("Upload a video to get a condition analysis from Gemini and attention maps from DINO.")
    
    video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
    submit_button = gr.Button("Run Full Analysis", variant="primary")
    
    gr.Markdown("## Gemini Condition Analysis")
    with gr.Row():
        device_type_output = gr.Textbox(label="Device Type")
        condition_output = gr.Textbox(label="Condition")
        reason_output = gr.Textbox(label="Reason / Details")
        
    gr.Markdown("## DINO: Overlayed Heatmaps on Representative Frames")
    # Use gr.Gallery to display multiple images
    overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery")
    
    gr.Markdown("## DINO: Self-Attention Maps")
    attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery")

    submit_button.click(
        fn=full_analysis,
        inputs=video_input,
        outputs=[
            device_type_output, 
            condition_output, 
            reason_output, 
            overlay_gallery, 
            attention_gallery
        ],
        show_progress='full'
    )

demo.launch(debug=True)