Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import json | |
| import time | |
| from dotenv import load_dotenv | |
| from google import genai | |
| from google.genai import types | |
| # Import our DINO processing function | |
| from dino_processor import process_video_with_dino | |
| # --- Configuration and Client Initialization --- | |
| load_dotenv() | |
| try: | |
| client = genai.Client(api_key=os.environ["GEMINI_API_KEY"]) | |
| except KeyError: | |
| raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.") | |
| # --- Main Gradio Function --- | |
| def full_analysis(video_file_path, user_observations): | |
| if not video_file_path: | |
| # Return empty values for all components | |
| return "Please upload a video first.", "", "", None, None | |
| # --- DINO Analysis (runs in parallel with Gemini upload) --- | |
| print("--- Starting DINO Analysis ---") | |
| try: | |
| dino_results = process_video_with_dino(video_file_path) | |
| overlay_images = [res[0] for res in dino_results] | |
| attention_maps = [res[1] for res in dino_results] | |
| print("--- DINO Analysis Complete ---") | |
| except Exception as e: | |
| print(f"ERROR during DINO processing: {e}") | |
| return f"Error in DINO processing: {e}", "", "", None, None | |
| # --- Gemini Analysis --- | |
| print("--- Starting Gemini Analysis ---") | |
| uploaded_file = None | |
| try: | |
| uploaded_file = client.files.upload(file=video_file_path) | |
| while uploaded_file.state.name == "PROCESSING": | |
| time.sleep(5) | |
| uploaded_file = client.files.get(name=uploaded_file.name) | |
| if uploaded_file.state.name != "ACTIVE": | |
| raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}") | |
| # --- NEW, MORE ADVANCED PROMPT --- | |
| prompt = f""" | |
| Analyze the provided video and the user's observations. Respond ONLY with a valid JSON object. | |
| User Observations: "{user_observations if user_observations else 'None'}" | |
| Follow these steps: | |
| 1. First, identify the primary object in the video. Is it an electronic device or a white good (e.g., refrigerator, washing machine, microwave)? | |
| 2. If it IS an electronic device or white good, respond with a JSON object with these three keys: | |
| - "device_type": A short string identifying the specific device (e.g., "iPhone 13 Pro", "Samsung Washing Machine"). | |
| - "condition": A single word describing its visual condition. Choose ONLY from: "unusable", "poor", "functional", "good", "excellent". | |
| - "reason": A very brief, concise string explaining the condition (e.g., "heavy screen cracks", "minor scratches on body", "clean with no visible marks"). | |
| 3. If it is NOT an electronic device or white good, respond with a JSON object with only ONE key: | |
| - "device_type": A string starting with "Invalid: " followed by the object's category (e.g., "Invalid: Water Bottle", "Invalid: Shoe"). | |
| Example for a valid device: | |
| {{ | |
| "device_type": "Dell XPS Laptop", | |
| "condition": "functional", | |
| "reason": "Visible scratches and paint erosion on the lid." | |
| }} | |
| Example for an invalid object: | |
| {{ | |
| "device_type": "Invalid: Coffee Mug" | |
| }} | |
| """ | |
| model_name = "gemini-1.5-flash-latest" | |
| config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json") | |
| contents = [uploaded_file, prompt] | |
| response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config) | |
| parsed_json = json.loads(response.text) | |
| # Check if the device was valid or not | |
| device_type = parsed_json.get("device_type", "N/A") | |
| if "Invalid:" in device_type: | |
| condition = "N/A" | |
| reason = "Object is not a valid electronic device or white good." | |
| else: | |
| condition = parsed_json.get("condition", "N/A") | |
| reason = parsed_json.get("reason", "N/A") | |
| print("--- Gemini Analysis Complete ---") | |
| except Exception as e: | |
| print(f"ERROR during Gemini processing: {e}") | |
| device_type, condition, reason = f"Error in Gemini processing: {e}", "", "" | |
| finally: | |
| if uploaded_file: | |
| client.files.delete(name=uploaded_file.name) | |
| # Return all the results to the Gradio UI | |
| return device_type, condition, reason, overlay_images, attention_maps | |
| # --- Gradio Interface --- | |
| # Define a custom theme | |
| theme = gr.themes.Soft( | |
| primary_hue="green", | |
| secondary_hue="emerald", | |
| ).set( | |
| body_background_fill="#E0F2F1", # A light aquatic green | |
| block_background_fill="white", | |
| block_border_width="1px", | |
| block_shadow="*shadow_drop_lg", | |
| button_primary_background_fill="*primary_500", | |
| button_primary_text_color="white", | |
| ) | |
| with gr.Blocks(theme=theme) as demo: | |
| # Using a refrigerator emoji for white goods π§ or π | |
| gr.Markdown("# π§ KIKERP: AI Assessment Tool") | |
| gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4") | |
| # NEW: Optional text input for user observations | |
| observations_input = gr.Textbox( | |
| label="Optional: User Observations", | |
| placeholder="e.g., Device overheats, screen flickers, makes a strange noise..." | |
| ) | |
| submit_button = gr.Button("Run Full Analysis", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("## Gemini Condition Analysis") | |
| with gr.Row(): | |
| device_type_output = gr.Textbox(label="Device Type") | |
| condition_output = gr.Textbox(label="Condition") | |
| reason_output = gr.Textbox(label="Reason / Details") | |
| gr.Markdown("---") # Adds a horizontal line for separation | |
| gr.Markdown("## DINO: Visual Feature Analysis") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Overlayed Heatmaps on Key Frames") | |
| overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=5) | |
| with gr.Column(): | |
| gr.Markdown("### Self-Attention Maps") | |
| attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery", columns=5) | |
| submit_button.click( | |
| fn=full_analysis, | |
| # Add the new observations_input to the inputs list | |
| inputs=[video_input, observations_input], | |
| outputs=[ | |
| device_type_output, | |
| condition_output, | |
| reason_output, | |
| overlay_gallery, | |
| attention_gallery | |
| ], | |
| show_progress='full' | |
| ) | |
| demo.launch(debug=True) |