import gradio as gr import os import json import time from dotenv import load_dotenv from google import genai from google.genai import types # Import the optimized DINO functions from dino_processor import load_dino_model, process_video_with_dino # --- Configuration and Initialization --- load_dotenv() try: GEMINI_CLIENT = genai.Client(api_key=os.environ["GEMINI_API_KEY"]) except KeyError: raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.") DINO_MODEL, DINO_DEVICE = load_dino_model() # --- Main Gradio Generator Function --- def full_analysis(video_file_path, user_observations): if not video_file_path: yield "Please upload a video first.", "", "", None return # Stage 1: Gemini Analysis print("--- Starting Device Analysis ---") device_type, condition, reason = "Processing...", "Processing...", "Processing..." yield device_type, condition, reason, None uploaded_file = None try: uploaded_file = GEMINI_CLIENT.files.upload(file=video_file_path) while uploaded_file.state.name == "PROCESSING": time.sleep(5) uploaded_file = GEMINI_CLIENT.files.get(name=uploaded_file.name) if uploaded_file.state.name != "ACTIVE": raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}") prompt = f""" Analyze the provided video and user observations. Respond ONLY with a valid JSON object. User Observations: "{user_observations if user_observations else 'None'}" Logic: 1. Is the object an electronic device or white good? 2. If YES: respond with JSON keys "device_type", "condition" (from "unusable", "poor", "functional", "good", "excellent"), and "reason". 3. If NO: respond with JSON key "device_type" starting with "Invalid: ". """ model_name = "gemini-1.5-flash-latest" # Using a reliable public model config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json") contents = [uploaded_file, prompt] response = GEMINI_CLIENT.models.generate_content(model=f"models/{model_name}", contents=contents, config=config) parsed_json = json.loads(response.text) device_type = parsed_json.get("device_type", "N/A") if "Invalid:" in device_type: condition, reason = "N/A", "Object is not a valid device." else: condition, reason = parsed_json.get("condition", "N/A"), parsed_json.get("reason", "N/A") print("--- Device Analysis Complete ---") except Exception as e: print(f"ERROR during Gemini processing: {e}") device_type, condition, reason = f"Error in Gemini processing: {e}", "", "" finally: if uploaded_file: GEMINI_CLIENT.files.delete(name=uploaded_file.name) yield device_type, condition, reason, None # Stage 2: DINO Analysis print("--- Starting DINO Analysis ---") try: # OPTIMIZATION: This now returns a simple list of overlay image paths overlay_images = process_video_with_dino(video_file_path, DINO_MODEL, DINO_DEVICE) print("--- DINO Analysis Complete ---") except Exception as e: print(f"ERROR during DINO processing: {e}") reason += f" | DINO Error: {e}" overlay_images = None # OPTIMIZATION: Final yield with only 4 items yield device_type, condition, reason, overlay_images # --- Gradio Interface (Simplified) --- theme = gr.themes.Soft( primary_hue="green", secondary_hue="emerald" ).set( body_background_fill="#E0F2F1", block_background_fill="white", block_border_width="1px", block_shadow="*shadow_drop_lg", button_primary_background_fill="*primary_500", button_primary_text_color="white", ) with gr.Blocks(theme=theme) as demo: gr.Markdown("# 🧊 KIKERP: AI Assessment Tool") gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.") with gr.Row(): with gr.Column(scale=1): video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4") observations_input = gr.Textbox(label="Optional: User Observations", placeholder="e.g., Device overheats...") submit_button = gr.Button("Run Full Analysis", variant="primary") with gr.Column(scale=2): gr.Markdown("## Device Condition Analysis") with gr.Row(): device_type_output = gr.Textbox(label="Device Type", interactive=False) condition_output = gr.Textbox(label="Condition", interactive=False) reason_output = gr.Textbox(label="Reason / Details", interactive=False) gr.Markdown("---") # OPTIMIZATION: Simplified DINO output section gr.Markdown("## Overlayed Heatmaps on Key Frames") overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=3) submit_button.click( fn=full_analysis, inputs=[video_input, observations_input], # OPTIMIZATION: Updated outputs list to have only 4 items outputs=[ device_type_output, condition_output, reason_output, overlay_gallery ], show_progress='full' ) demo.launch(debug=True)