Gaurav vashistha commited on
Commit
bcc921a
Β·
1 Parent(s): 1f88d4a

Fix Visual Analyst: Pivot to Gemini Vision and update dependencies

Browse files
Files changed (2) hide show
  1. agents/visual_analyst.py +33 -56
  2. requirements.txt +1 -0
agents/visual_analyst.py CHANGED
@@ -1,7 +1,6 @@
1
- import google.generativeai as genai
2
  import os
3
  import json
4
- import asyncio
5
  from dotenv import load_dotenv
6
 
7
  load_dotenv()
@@ -13,70 +12,48 @@ class VisualAnalyst:
13
  raise ValueError("GEMINI_API_KEY not found")
14
 
15
  genai.configure(api_key=self.api_key)
16
-
17
- print("πŸ” Checking available Gemini models...")
 
 
 
18
  try:
19
- my_models = [m.name for m in genai.list_models() if 'generateContent' in m.supported_generation_methods]
20
- print(f"πŸ“‹ Available Models: {my_models}")
 
 
 
 
 
 
 
 
21
 
22
- # UPDATED PRIORITY: GEMINI 2.0 FIRST
23
- preferred_order = [
24
- 'models/gemini-2.0-flash-exp', # <--- Newest & Smartest (Available in logs)
25
- 'models/gemini-1.5-pro',
26
- 'models/gemini-1.5-pro-001',
27
- 'models/gemini-1.5-flash',
28
- 'models/gemini-1.5-flash-001',
29
- 'models/gemini-pro-vision'
30
- ]
31
 
32
- selected_model = "models/gemini-2.0-flash-exp" # Default to the new one
 
 
33
 
34
- for model_name in preferred_order:
35
- if model_name in my_models:
36
- selected_model = model_name
37
- break
38
 
39
- print(f"βœ… Selected Vision Model: {selected_model}")
40
- self.model = genai.GenerativeModel(selected_model)
 
 
 
 
41
 
42
- except Exception as e:
43
- print(f"⚠️ Model list failed ({e}), defaulting to gemini-2.0-flash-exp")
44
- self.model = genai.GenerativeModel('models/gemini-2.0-flash-exp')
45
-
46
- async def analyze_image(self, image_path: str):
47
- # Adaptation: Read file path to bytes, as main.py passes a path
48
- try:
49
- with open(image_path, "rb") as f:
50
- image_bytes = f.read()
51
- except Exception as e:
52
- print(f"❌ File Read Error: {e}")
53
- return {
54
- "main_color": "Unknown",
55
- "visual_features": [f"Error reading file: {str(e)}"]
56
- }
57
 
58
- prompt = (
59
- "Analyze this product image for an e-commerce listing. "
60
- "Return a JSON object with keys: main_color, product_type, design_style, visual_features."
61
- )
62
- try:
63
- # Adaptation: Run in thread to allow async await
64
- response = await asyncio.to_thread(
65
- self.model.generate_content,
66
- [
67
- {'mime_type': 'image/jpeg', 'data': image_bytes},
68
- prompt
69
- ]
70
- )
71
-
72
- text = response.text
73
- if text.startswith('```json'): text = text[7:]
74
- if text.endswith('```'): text = text[:-3]
75
-
76
- return json.loads(text.strip())
77
  except Exception as e:
78
  print(f"❌ Analysis Failed: {e}")
79
  return {
80
  "main_color": "Unknown",
 
 
81
  "visual_features": [f"Error: {str(e)}"]
82
  }
 
 
1
  import os
2
  import json
3
+ import google.generativeai as genai
4
  from dotenv import load_dotenv
5
 
6
  load_dotenv()
 
12
  raise ValueError("GEMINI_API_KEY not found")
13
 
14
  genai.configure(api_key=self.api_key)
15
+ self.model_name = "models/gemini-flash-latest"
16
+ self.model = genai.GenerativeModel(self.model_name)
17
+ print(f"βœ… VisualAnalyst stored Gemini model: {self.model_name}")
18
+
19
+ def analyze_image(self, image_path: str):
20
  try:
21
+ # Upload the file to Gemini
22
+ # Note: For efficiency in production, files should be managed (uploads/deletes)
23
+ # but for this agentic flow, we'll upload per request or assume local path usage helper if needed.
24
+ # However, the standard `model.generate_content` can take PIL images or file objects directly for some sdk versions,
25
+ # but using the File API is cleaner for 1.5 Flash multi-modal.
26
+ # Let's use the simpler PIL integration if available, or just path if the SDK supports it.
27
+ # actually, standard genai usage for images usually involves PIL or uploading.
28
+ # Let's try the PIL approach first as it's often more direct for local scripts.
29
+ import PIL.Image
30
+ img = PIL.Image.open(image_path)
31
 
32
+ user_prompt = (
33
+ "Analyze this product image. "
34
+ "Return ONLY valid JSON with keys: main_color, product_type, design_style, visual_features."
35
+ )
 
 
 
 
 
36
 
37
+ # Gemini 1.5 Flash supports JSON response schema, but simple prompting often works well too.
38
+ # We'll stick to prompt engineering for now to match the "Return ONLY valid JSON" instruction.
39
+ response = self.model.generate_content([user_prompt, img])
40
 
41
+ response_text = response.text
 
 
 
42
 
43
+ # Clean up potential markdown code fences
44
+ cleaned_content = response_text
45
+ if "```json" in cleaned_content:
46
+ cleaned_content = cleaned_content.replace("```json", "").replace("```", "")
47
+ elif "```" in cleaned_content:
48
+ cleaned_content = cleaned_content.replace("```", "")
49
 
50
+ return json.loads(cleaned_content.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
  print(f"❌ Analysis Failed: {e}")
54
  return {
55
  "main_color": "Unknown",
56
+ "product_type": "Unknown",
57
+ "design_style": "Unknown",
58
  "visual_features": [f"Error: {str(e)}"]
59
  }
requirements.txt CHANGED
@@ -11,3 +11,4 @@ python-dotenv
11
  google-generativeai>=0.8.3
12
  groq
13
  Pillow
 
 
11
  google-generativeai>=0.8.3
12
  groq
13
  Pillow
14
+ huggingface_hub