Spaces:
Sleeping
Sleeping
File size: 4,542 Bytes
afb9dad 2955ae6 a64527c afb9dad 5a474ac 17611a2 5a474ac a64527c 5a474ac afb9dad 4f37c46 5a474ac afb9dad 9c2cb20 afb9dad a64527c afb9dad a64527c 9c2cb20 a64527c 17611a2 9c2cb20 a64527c 17611a2 7135735 a64527c 7135735 9c2cb20 641b9cd 2955ae6 641b9cd 9c2cb20 a64527c ca5f4b2 a64527c ca5f4b2 9c2cb20 a64527c 9c2cb20 a64527c ca5f4b2 a64527c 2955ae6 afb9dad a64527c 2955ae6 a64527c 9c2cb20 a64527c 9c2cb20 afb9dad a64527c afb9dad a64527c 9c2cb20 afb9dad 9c2cb20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import os
import json
import time
from dotenv import load_dotenv
from google import genai
from google.genai import types
# Import our new DINO processing function
from dino_processor import process_video_with_dino
# --- Configuration and Client Initialization ---
load_dotenv()
try:
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
except KeyError:
raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")
# --- Main Gradio Function ---
def full_analysis(video_file_path):
if not video_file_path:
# Return empty values for all components
return "Please upload a video first.", "", "", None, None
# --- DINO Analysis ---
print("--- Starting DINO Analysis ---")
try:
# This function will return a list of tuples: [(overlay1, attn1), (overlay2, attn2), ...]
dino_results = process_video_with_dino(video_file_path)
overlay_images = [res[0] for res in dino_results]
attention_maps = [res[1] for res in dino_results]
print("--- DINO Analysis Complete ---")
except Exception as e:
print(f"ERROR during DINO processing: {e}")
# Return an error message and empty galleries
return f"Error in DINO processing: {e}", "", "", None, None
# --- Gemini Analysis ---
print("--- Starting Gemini Analysis ---")
uploaded_file = None
try:
# Wait for DINO to finish before starting the Gemini upload
uploaded_file = client.files.upload(file=video_file_path)
while uploaded_file.state.name == "PROCESSING":
time.sleep(5)
uploaded_file = client.files.get(name=uploaded_file.name)
if uploaded_file.state.name != "ACTIVE":
raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
prompt = """
Analyze the provided video. Respond ONLY with a valid JSON object with three keys:
1. "device_type": A short string identifying the device.
2. "condition": A single word: "Mint", "Excellent", "Good", "Fair", or "Poor".
3. "reason": A brief string explaining the condition.
"""
model_name = "gemini-1.5-flash-latest" # Using a reliable public model
config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
contents = [uploaded_file, prompt]
response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
parsed_json = json.loads(response.text)
device_type = parsed_json.get("device_type", "N/A")
condition = parsed_json.get("condition", "N/A")
reason = parsed_json.get("reason", "N/A")
print("--- Gemini Analysis Complete ---")
except Exception as e:
print(f"ERROR during Gemini processing: {e}")
device_type, condition, reason = f"Error in Gemini processing: {e}", "", ""
finally:
if uploaded_file:
client.files.delete(name=uploaded_file.name)
# Return all the results to the Gradio UI
return device_type, condition, reason, overlay_images, attention_maps
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📱 Advanced Device Condition Analyzer")
gr.Markdown("Upload a video to get a condition analysis from Gemini and attention maps from DINO.")
video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
submit_button = gr.Button("Run Full Analysis", variant="primary")
gr.Markdown("## Gemini Condition Analysis")
with gr.Row():
device_type_output = gr.Textbox(label="Device Type")
condition_output = gr.Textbox(label="Condition")
reason_output = gr.Textbox(label="Reason / Details")
gr.Markdown("## DINO: Overlayed Heatmaps on Representative Frames")
# Use gr.Gallery to display multiple images
overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery")
gr.Markdown("## DINO: Self-Attention Maps")
attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery")
submit_button.click(
fn=full_analysis,
inputs=video_input,
outputs=[
device_type_output,
condition_output,
reason_output,
overlay_gallery,
attention_gallery
],
show_progress='full'
)
demo.launch(debug=True) |