File size: 2,694 Bytes
69c20e8
 
bcc921a
69c20e8
 
 
 
 
 
3a6f5ad
 
9d3f2ae
1f88d4a
3a6f5ad
bcc921a
 
 
 
c6137f0
1900bac
bcc921a
 
 
 
 
 
 
 
 
 
1900bac
bcc921a
 
 
 
1900bac
bcc921a
 
 
1900bac
bcc921a
1900bac
bcc921a
 
 
 
 
 
1900bac
bcc921a
3a6f5ad
69c20e8
1900bac
69c20e8
b8195df
bcc921a
 
3a6f5ad
69c20e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import json
import google.generativeai as genai
from dotenv import load_dotenv

load_dotenv()

class VisualAnalyst:
    def __init__(self):
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY not found")
        
        genai.configure(api_key=self.api_key)
        self.model_name = "models/gemini-flash-latest"
        self.model = genai.GenerativeModel(self.model_name)
        print(f"βœ… VisualAnalyst stored Gemini model: {self.model_name}")

    async def analyze_image(self, image_path: str):
        try:
            # Upload the file to Gemini
            # Note: For efficiency in production, files should be managed (uploads/deletes)
            # but for this agentic flow, we'll upload per request or assume local path usage helper if needed.
            # However, the standard `model.generate_content` can take PIL images or file objects directly for some sdk versions,
            # but using the File API is cleaner for 1.5 Flash multi-modal.
            # Let's use the simpler PIL integration if available, or just path if the SDK supports it.
            # actually, standard genai usage for images usually involves PIL or uploading.
            # Let's try the PIL approach first as it's often more direct for local scripts.
            import PIL.Image
            img = PIL.Image.open(image_path)
            
            user_prompt = (
                "Analyze this product image. "
                "Return ONLY valid JSON with keys: main_color, product_type, design_style, visual_features."
            )
            
            # Gemini 1.5 Flash supports JSON response schema, but simple prompting often works well too.
            # We'll stick to prompt engineering for now to match the "Return ONLY valid JSON" instruction.
            response = self.model.generate_content([user_prompt, img])
            
            response_text = response.text
            
            # Clean up potential markdown code fences
            cleaned_content = response_text
            if "```json" in cleaned_content:
                cleaned_content = cleaned_content.replace("```json", "").replace("```", "")
            elif "```" in cleaned_content:
                 cleaned_content = cleaned_content.replace("```", "")
            
            return json.loads(cleaned_content.strip())

        except Exception as e:
            print(f"❌ Analysis Failed: {e}")
            return {
                "main_color": "Unknown",
                "product_type": "Unknown", 
                "design_style": "Unknown",
                "visual_features": [f"Error: {str(e)}"]
            }