File size: 2,565 Bytes
2fc8802
 
39bee95
 
 
 
2fc8802
 
 
 
 
39bee95
2fc8802
39bee95
2fc8802
39bee95
2fc8802
39bee95
2fc8802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39bee95
2fc8802
39bee95
 
 
 
2fc8802
39bee95
 
 
2fc8802
39bee95
2fc8802
 
 
 
39bee95
2fc8802
 
 
39bee95
2fc8802
 
 
 
 
 
 
 
 
39bee95
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys

src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
sys.path.append(src_directory)

import logging
from transformers import AutoProcessor, CLIPModel
from database import create_pinecone_index
from data import request_method
from dotenv import load_dotenv
import torch

# Add src directory to path

# Load environment variables
load_dotenv()
# HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

def get_image_embedding(image_data):
    """
    Processes an image, generates embeddings using CLIP, and indexes it in Pinecone.
    
    Args:
        image_data (dict): A dictionary containing 'photo_id' and 'photo_image_url'.
    
    Returns:
        str: Success or error message.
    """
    try:
        if not isinstance(image_data, dict):
            raise ValueError("Invalid input: Expected a dictionary with 'photo_id' and 'photo_image_url'")

        photo_id = image_data.get("photo_id")
        url = image_data.get("photo_image_url")
        
        if not photo_id or not url:
            raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
        
        # Retrieve the image from the URL
        image = request_method.get_urlimage(image_data)
        if image is None:
            raise ValueError(f"Failed to retrieve image from URL: {url}")

        # Process image and generate embeddings
        inputs = processor(images=image, return_tensors="pt")
        with torch.no_grad():
            image_features = model.get_image_features(**inputs)
        embeddings = image_features.cpu().numpy().flatten().tolist()
        
        # Index the embeddings in Pinecone
        pinecone_index = create_pinecone_index.get_index()
        pinecone_index.upsert(
            vectors=[
                {
                    "id": str(photo_id),
                    "values": embeddings,
                    "metadata": {
                        "url": url,
                        "photo_id": str(photo_id)
                    }
                },
            ],
            namespace="image-search-dataset"
        )
        
        return f"Successfully indexed image {photo_id}"
    
    except Exception as e:
        logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}")
        return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"