Spaces:
Sleeping
Sleeping
File size: 6,886 Bytes
afb9dad 2955ae6 a64527c afb9dad 5a474ac 17611a2 5a474ac e69763d a64527c 5a474ac afb9dad 4f37c46 5a474ac afb9dad 9c2cb20 afb9dad a64527c e69763d afb9dad a64527c e69763d a64527c 9c2cb20 a64527c 17611a2 9c2cb20 17611a2 7135735 a64527c 7135735 e69763d 9c2cb20 e69763d a64527c ca5f4b2 a64527c ca5f4b2 e69763d 9c2cb20 e69763d a64527c 9c2cb20 a64527c ca5f4b2 a64527c 2955ae6 e69763d a64527c 9c2cb20 e69763d a64527c e69763d a64527c e69763d 9c2cb20 afb9dad a64527c e69763d a64527c 9c2cb20 afb9dad 9c2cb20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import gradio as gr
import os
import json
import time
from dotenv import load_dotenv
from google import genai
from google.genai import types
# Import our DINO processing function
from dino_processor import process_video_with_dino
# --- Configuration and Client Initialization ---
load_dotenv()
try:
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")
# --- Main Gradio Function ---
def full_analysis(video_file_path, user_observations):
if not video_file_path:
# Return empty values for all components
return "Please upload a video first.", "", "", None, None
# --- DINO Analysis (runs in parallel with Gemini upload) ---
print("--- Starting DINO Analysis ---")
try:
dino_results = process_video_with_dino(video_file_path)
overlay_images = [res[0] for res in dino_results]
attention_maps = [res[1] for res in dino_results]
print("--- DINO Analysis Complete ---")
except Exception as e:
print(f"ERROR during DINO processing: {e}")
return f"Error in DINO processing: {e}", "", "", None, None
# --- Gemini Analysis ---
print("--- Starting Gemini Analysis ---")
uploaded_file = None
try:
uploaded_file = client.files.upload(file=video_file_path)
while uploaded_file.state.name == "PROCESSING":
time.sleep(5)
uploaded_file = client.files.get(name=uploaded_file.name)
if uploaded_file.state.name != "ACTIVE":
raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
# --- NEW, MORE ADVANCED PROMPT ---
prompt = f"""
Analyze the provided video and the user's observations. Respond ONLY with a valid JSON object.
User Observations: "{user_observations if user_observations else 'None'}"
Follow these steps:
1. First, identify the primary object in the video. Is it an electronic device or a white good (e.g., refrigerator, washing machine, microwave)?
2. If it IS an electronic device or white good, respond with a JSON object with these three keys:
- "device_type": A short string identifying the specific device (e.g., "iPhone 13 Pro", "Samsung Washing Machine").
- "condition": A single word describing its visual condition. Choose ONLY from: "unusable", "poor", "functional", "good", "excellent".
- "reason": A very brief, concise string explaining the condition (e.g., "heavy screen cracks", "minor scratches on body", "clean with no visible marks").
3. If it is NOT an electronic device or white good, respond with a JSON object with only ONE key:
- "device_type": A string starting with "Invalid: " followed by the object's category (e.g., "Invalid: Water Bottle", "Invalid: Shoe").
Example for a valid device:
{{
"device_type": "Dell XPS Laptop",
"condition": "functional",
"reason": "Visible scratches and paint erosion on the lid."
}}
Example for an invalid object:
{{
"device_type": "Invalid: Coffee Mug"
}}
"""
model_name = "gemini-1.5-flash-latest"
config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
contents = [uploaded_file, prompt]
response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
parsed_json = json.loads(response.text)
# Check if the device was valid or not
device_type = parsed_json.get("device_type", "N/A")
if "Invalid:" in device_type:
condition = "N/A"
reason = "Object is not a valid electronic device or white good."
else:
condition = parsed_json.get("condition", "N/A")
reason = parsed_json.get("reason", "N/A")
print("--- Gemini Analysis Complete ---")
except Exception as e:
print(f"ERROR during Gemini processing: {e}")
device_type, condition, reason = f"Error in Gemini processing: {e}", "", ""
finally:
if uploaded_file:
client.files.delete(name=uploaded_file.name)
# Return all the results to the Gradio UI
return device_type, condition, reason, overlay_images, attention_maps
# --- Gradio Interface ---
# Define a custom theme
theme = gr.themes.Soft(
primary_hue="green",
secondary_hue="emerald",
).set(
body_background_fill="#E0F2F1", # A light aquatic green
block_background_fill="white",
block_border_width="1px",
block_shadow="*shadow_drop_lg",
button_primary_background_fill="*primary_500",
button_primary_text_color="white",
)
with gr.Blocks(theme=theme) as demo:
# Using a refrigerator emoji for white goods π§ or π
gr.Markdown("# π§ KIKERP: AI Assessment Tool")
gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.")
with gr.Row():
with gr.Column(scale=1):
video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
# NEW: Optional text input for user observations
observations_input = gr.Textbox(
label="Optional: User Observations",
placeholder="e.g., Device overheats, screen flickers, makes a strange noise..."
)
submit_button = gr.Button("Run Full Analysis", variant="primary")
with gr.Column(scale=2):
gr.Markdown("## Gemini Condition Analysis")
with gr.Row():
device_type_output = gr.Textbox(label="Device Type")
condition_output = gr.Textbox(label="Condition")
reason_output = gr.Textbox(label="Reason / Details")
gr.Markdown("---") # Adds a horizontal line for separation
gr.Markdown("## DINO: Visual Feature Analysis")
with gr.Row():
with gr.Column():
gr.Markdown("### Overlayed Heatmaps on Key Frames")
overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=5)
with gr.Column():
gr.Markdown("### Self-Attention Maps")
attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery", columns=5)
submit_button.click(
fn=full_analysis,
# Add the new observations_input to the inputs list
inputs=[video_input, observations_input],
outputs=[
device_type_output,
condition_output,
reason_output,
overlay_gallery,
attention_gallery
],
show_progress='full'
)
demo.launch(debug=True) |