vivek9chavan commited on
Commit
e69763d
Β·
verified Β·
1 Parent(s): a75f067

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -30
app.py CHANGED
@@ -7,7 +7,7 @@ from dotenv import load_dotenv
7
  from google import genai
8
  from google.genai import types
9
 
10
- # Import our new DINO processing function
11
  from dino_processor import process_video_with_dino
12
 
13
  # --- Configuration and Client Initialization ---
@@ -18,29 +18,26 @@ except KeyError:
18
  raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")
19
 
20
  # --- Main Gradio Function ---
21
- def full_analysis(video_file_path):
22
  if not video_file_path:
23
  # Return empty values for all components
24
  return "Please upload a video first.", "", "", None, None
25
 
26
- # --- DINO Analysis ---
27
  print("--- Starting DINO Analysis ---")
28
  try:
29
- # This function will return a list of tuples: [(overlay1, attn1), (overlay2, attn2), ...]
30
  dino_results = process_video_with_dino(video_file_path)
31
  overlay_images = [res[0] for res in dino_results]
32
  attention_maps = [res[1] for res in dino_results]
33
  print("--- DINO Analysis Complete ---")
34
  except Exception as e:
35
  print(f"ERROR during DINO processing: {e}")
36
- # Return an error message and empty galleries
37
  return f"Error in DINO processing: {e}", "", "", None, None
38
 
39
  # --- Gemini Analysis ---
40
  print("--- Starting Gemini Analysis ---")
41
  uploaded_file = None
42
  try:
43
- # Wait for DINO to finish before starting the Gemini upload
44
  uploaded_file = client.files.upload(file=video_file_path)
45
  while uploaded_file.state.name == "PROCESSING":
46
  time.sleep(5)
@@ -48,21 +45,50 @@ def full_analysis(video_file_path):
48
  if uploaded_file.state.name != "ACTIVE":
49
  raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
50
 
51
- prompt = """
52
- Analyze the provided video. Respond ONLY with a valid JSON object with three keys:
53
- 1. "device_type": A short string identifying the device.
54
- 2. "condition": A single word: "Mint", "Excellent", "Good", "Fair", or "Poor".
55
- 3. "reason": A brief string explaining the condition.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  """
57
- model_name = "gemini-1.5-flash-latest" # Using a reliable public model
 
58
  config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
59
  contents = [uploaded_file, prompt]
60
 
61
  response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
62
  parsed_json = json.loads(response.text)
 
 
63
  device_type = parsed_json.get("device_type", "N/A")
64
- condition = parsed_json.get("condition", "N/A")
65
- reason = parsed_json.get("reason", "N/A")
 
 
 
 
 
66
  print("--- Gemini Analysis Complete ---")
67
 
68
  except Exception as e:
@@ -76,29 +102,56 @@ def full_analysis(video_file_path):
76
  return device_type, condition, reason, overlay_images, attention_maps
77
 
78
  # --- Gradio Interface ---
79
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
80
- gr.Markdown("# πŸ“± Advanced Device Condition Analyzer")
81
- gr.Markdown("Upload a video to get a condition analysis from Gemini and attention maps from DINO.")
82
-
83
- video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
84
- submit_button = gr.Button("Run Full Analysis", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- gr.Markdown("## Gemini Condition Analysis")
87
  with gr.Row():
88
- device_type_output = gr.Textbox(label="Device Type")
89
- condition_output = gr.Textbox(label="Condition")
90
- reason_output = gr.Textbox(label="Reason / Details")
 
 
 
 
 
91
 
92
- gr.Markdown("## DINO: Overlayed Heatmaps on Representative Frames")
93
- # Use gr.Gallery to display multiple images
94
- overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery")
 
 
 
95
 
96
- gr.Markdown("## DINO: Self-Attention Maps")
97
- attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery")
 
 
 
 
 
 
 
 
98
 
99
  submit_button.click(
100
  fn=full_analysis,
101
- inputs=video_input,
 
102
  outputs=[
103
  device_type_output,
104
  condition_output,
 
7
  from google import genai
8
  from google.genai import types
9
 
10
+ # Import our DINO processing function
11
  from dino_processor import process_video_with_dino
12
 
13
  # --- Configuration and Client Initialization ---
 
18
  raise gr.Error("FATAL: GEMINI_API_KEY not found. Please set it in your Hugging Face Space secrets.")
19
 
20
  # --- Main Gradio Function ---
21
+ def full_analysis(video_file_path, user_observations):
22
  if not video_file_path:
23
  # Return empty values for all components
24
  return "Please upload a video first.", "", "", None, None
25
 
26
+ # --- DINO Analysis (runs in parallel with Gemini upload) ---
27
  print("--- Starting DINO Analysis ---")
28
  try:
 
29
  dino_results = process_video_with_dino(video_file_path)
30
  overlay_images = [res[0] for res in dino_results]
31
  attention_maps = [res[1] for res in dino_results]
32
  print("--- DINO Analysis Complete ---")
33
  except Exception as e:
34
  print(f"ERROR during DINO processing: {e}")
 
35
  return f"Error in DINO processing: {e}", "", "", None, None
36
 
37
  # --- Gemini Analysis ---
38
  print("--- Starting Gemini Analysis ---")
39
  uploaded_file = None
40
  try:
 
41
  uploaded_file = client.files.upload(file=video_file_path)
42
  while uploaded_file.state.name == "PROCESSING":
43
  time.sleep(5)
 
45
  if uploaded_file.state.name != "ACTIVE":
46
  raise Exception(f"File processing failed for Gemini. State: {uploaded_file.state.name}")
47
 
48
+ # --- NEW, MORE ADVANCED PROMPT ---
49
+ prompt = f"""
50
+ Analyze the provided video and the user's observations. Respond ONLY with a valid JSON object.
51
+
52
+ User Observations: "{user_observations if user_observations else 'None'}"
53
+
54
+ Follow these steps:
55
+ 1. First, identify the primary object in the video. Is it an electronic device or a white good (e.g., refrigerator, washing machine, microwave)?
56
+ 2. If it IS an electronic device or white good, respond with a JSON object with these three keys:
57
+ - "device_type": A short string identifying the specific device (e.g., "iPhone 13 Pro", "Samsung Washing Machine").
58
+ - "condition": A single word describing its visual condition. Choose ONLY from: "unusable", "poor", "functional", "good", "excellent".
59
+ - "reason": A very brief, concise string explaining the condition (e.g., "heavy screen cracks", "minor scratches on body", "clean with no visible marks").
60
+ 3. If it is NOT an electronic device or white good, respond with a JSON object with only ONE key:
61
+ - "device_type": A string starting with "Invalid: " followed by the object's category (e.g., "Invalid: Water Bottle", "Invalid: Shoe").
62
+
63
+ Example for a valid device:
64
+ {{
65
+ "device_type": "Dell XPS Laptop",
66
+ "condition": "functional",
67
+ "reason": "Visible scratches and paint erosion on the lid."
68
+ }}
69
+
70
+ Example for an invalid object:
71
+ {{
72
+ "device_type": "Invalid: Coffee Mug"
73
+ }}
74
  """
75
+
76
+ model_name = "gemini-1.5-flash-latest"
77
  config = types.GenerateContentConfig(temperature=0.2, response_mime_type="application/json")
78
  contents = [uploaded_file, prompt]
79
 
80
  response = client.models.generate_content(model=f"models/{model_name}", contents=contents, config=config)
81
  parsed_json = json.loads(response.text)
82
+
83
+ # Check if the device was valid or not
84
  device_type = parsed_json.get("device_type", "N/A")
85
+ if "Invalid:" in device_type:
86
+ condition = "N/A"
87
+ reason = "Object is not a valid electronic device or white good."
88
+ else:
89
+ condition = parsed_json.get("condition", "N/A")
90
+ reason = parsed_json.get("reason", "N/A")
91
+
92
  print("--- Gemini Analysis Complete ---")
93
 
94
  except Exception as e:
 
102
  return device_type, condition, reason, overlay_images, attention_maps
103
 
104
  # --- Gradio Interface ---
105
+ # Define a custom theme
106
+ theme = gr.themes.Soft(
107
+ primary_hue="green",
108
+ secondary_hue="emerald",
109
+ ).set(
110
+ body_background_fill="#E0F2F1", # A light aquatic green
111
+ block_background_fill="white",
112
+ block_border_width="1px",
113
+ block_shadow="*shadow_drop_lg",
114
+ button_primary_background_fill="*primary_500",
115
+ button_primary_text_color="white",
116
+ )
117
+
118
+ with gr.Blocks(theme=theme) as demo:
119
+ # Using a refrigerator emoji for white goods 🧊 or πŸ”Œ
120
+ gr.Markdown("# 🧊 KIKERP: AI Assessment Tool")
121
+ gr.Markdown("Upload a video of a device or white good for a complete visual and AI-driven analysis.")
122
 
 
123
  with gr.Row():
124
+ with gr.Column(scale=1):
125
+ video_input = gr.Video(label="Upload or Record Video", sources=["upload", "webcam"], format="mp4")
126
+ # NEW: Optional text input for user observations
127
+ observations_input = gr.Textbox(
128
+ label="Optional: User Observations",
129
+ placeholder="e.g., Device overheats, screen flickers, makes a strange noise..."
130
+ )
131
+ submit_button = gr.Button("Run Full Analysis", variant="primary")
132
 
133
+ with gr.Column(scale=2):
134
+ gr.Markdown("## Gemini Condition Analysis")
135
+ with gr.Row():
136
+ device_type_output = gr.Textbox(label="Device Type")
137
+ condition_output = gr.Textbox(label="Condition")
138
+ reason_output = gr.Textbox(label="Reason / Details")
139
 
140
+ gr.Markdown("---") # Adds a horizontal line for separation
141
+
142
+ gr.Markdown("## DINO: Visual Feature Analysis")
143
+ with gr.Row():
144
+ with gr.Column():
145
+ gr.Markdown("### Overlayed Heatmaps on Key Frames")
146
+ overlay_gallery = gr.Gallery(label="Overlayed Heatmaps", elem_id="gallery", columns=5)
147
+ with gr.Column():
148
+ gr.Markdown("### Self-Attention Maps")
149
+ attention_gallery = gr.Gallery(label="Self-Attention Maps", elem_id="gallery", columns=5)
150
 
151
  submit_button.click(
152
  fn=full_analysis,
153
+ # Add the new observations_input to the inputs list
154
+ inputs=[video_input, observations_input],
155
  outputs=[
156
  device_type_output,
157
  condition_output,