KIKERP_Demo / app.py
vivek9chavan's picture
Update app.py
e69763d verified
raw
history blame
6.89 kB
import gradio as gr
import os
import json
import time
from dotenv import load_dotenv
from google import genai
from google.genai import types
# Import our DINO processing function
from dino_processor import process_video_with_dino
# --- Configuration and Client Initialization ---
load_dotenv()
try:
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")
# --- Main Gradio Function ---
def full_analysis(video_file_path, user_observations):
if not video_file_path:
# Return empty values for all components
return "Please upload a video first.", "", "", None, None
# --- DINO Analysis (runs in parallel with Gemini upload) ---
print("--- Starting DINO Analysis ---")
try:
dino_results = process_video_with_dino(video_file_path)
overlay_images = [res[0] for res in dino_results]
attention_maps = [res[1] for res in dino_results]
print("--- DINO Analysis Complete ---")
except Exception as e:
print(f"ERROR during DINO processing: {e}")
return f"Error in DINO processing: {e}", "", "", None, None
# --- Gemini Analysis ---
print("--- Starting Gemini Analysis ---")
uploaded_file = None
try:
uploaded_file = client.files.upload(file=video_file_path)
while uploaded_file.state.name == "PROCESSING":
time.sleep(5)
uploaded_file = client.files.get(name=uploaded_file.name)
if uploaded_file.state.name != "ACTIVE":
raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
# --- NEW, MORE ADVANCED PROMPT ---
prompt = f"""
Analyze the provided video and the user's observations. Respond ONLY with a valid JSON object.
User Observations: "{user_observations if user_observations else 'None'}"
Follow these steps:
1. First, identify the primary object in the video. Is it an electronic device or a white good (e.g., refrigerator, washing machine, microwave)?
2. If it IS an electronic device or white good, respond with a JSON object with these three keys:
- "device_type": A short string identifying the specific device (e.g., "iPhone 13 Pro", "Samsung Washing Machine").
- "condition": A single word describing its visual condition. Choose ONLY from: "unusable", "poor", "functional", "good", "excellent".
- "reason": A very brief, concise string explaining the condition (e.g., "heavy screen cracks", "minor scratches on body", "clean with no visible marks").
3. If it is NOT an electronic device or white good, respond with a JSON object with only ONE key:
- "device_type": A string starting with "Invalid: " followed by the object's category (e.g., "Invalid: Water Bottle", "Invalid: Shoe").
Example for a valid device:
{{
"device_type": "Dell XPS Laptop",
"condition": "functional",
"reason": "Visible scratches and paint erosion on the lid."
}}
Example for an invalid object:
{{
"device_type": "Invalid: Coffee Mug"
}}
"""
model_name = "gemini-1.5-flash-latest"
config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
contents = [uploaded_file, prompt]
response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
parsed_json = json.loads(response.text)
# Check if the device was valid or not
device_type = parsed_json.get("device_type", "N/A")
if "Invalid:" in device_type:
condition = "N/A"
reason = "Object is not a valid electronic device or white good."
else:
condition = parsed_json.get("condition", "N/A")
reason = parsed_json.get("reason", "N/A")
print("--- Gemini Analysis Complete ---")
except Exception as e:
print(f"ERROR during Gemini processing: {e}")
device_type, condition, reason = f"Error in Gemini processing: {e}", "", ""
finally:
if uploaded_file:
client.files.delete(name=uploaded_file.name)
# Return all the results to the Gradio UI
return device_type, condition, reason, overlay_images, attention_maps
# --- Gradio Interface ---
# Define a custom theme
theme = gr.themes.Soft(
primary_hue="green",
secondary_hue="emerald",
).set(
body_background_fill="#E0F2F1", # A light aquatic green
block_background_fill="white",
block_border_width="1px",
block_shadow="*shadow_drop_lg",
button_primary_background_fill="*primary_500",
button_primary_text_color="white",
)
with gr.Blocks(theme=theme) as demo:
# Using a refrigerator emoji for white goods 🧊 or πŸ”Œ
gr.Markdown("# 🧊 KIKERP: AI Assessment Tool")
gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.")
with gr.Row():
with gr.Column(scale=1):
video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
# NEW: Optional text input for user observations
observations_input = gr.Textbox(
label="Optional: User Observations",
placeholder="e.g., Device overheats, screen flickers, makes a strange noise..."
)
submit_button = gr.Button("Run Full Analysis", variant="primary")
with gr.Column(scale=2):
gr.Markdown("## Gemini Condition Analysis")
with gr.Row():
device_type_output = gr.Textbox(label="Device Type")
condition_output = gr.Textbox(label="Condition")
reason_output = gr.Textbox(label="Reason / Details")
gr.Markdown("---") # Adds a horizontal line for separation
gr.Markdown("## DINO: Visual Feature Analysis")
with gr.Row():
with gr.Column():
gr.Markdown("### Overlayed Heatmaps on Key Frames")
overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=5)
with gr.Column():
gr.Markdown("### Self-Attention Maps")
attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery", columns=5)
submit_button.click(
fn=full_analysis,
# Add the new observations_input to the inputs list
inputs=[video_input, observations_input],
outputs=[
device_type_output,
condition_output,
reason_output,
overlay_gallery,
attention_gallery
],
show_progress='full'
)
demo.launch(debug=True)