Spaces:

velmurugan1122
/

Clip_Image_Search

Sleeping

File size: 2,565 Bytes

2fc8802
 
39bee95
 
 
 
2fc8802
 
 
 
 
39bee95
2fc8802
39bee95
2fc8802
39bee95
2fc8802
39bee95
2fc8802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39bee95
2fc8802
39bee95
 
 
 
2fc8802
39bee95
 
 
2fc8802
39bee95
2fc8802
 
 
 
39bee95
2fc8802
 
 
39bee95
2fc8802
 
 
 
 
 
 
 
 
39bee95

import os
import sys

src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
sys.path.append(src_directory)

import logging
from transformers import AutoProcessor, CLIPModel
from database import create_pinecone_index
from data import request_method
from dotenv import load_dotenv
import torch

# Add src directory to path

# Load environment variables
load_dotenv()
# HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")

def get_image_embedding(image_data):
    """
    Processes an image, generates embeddings using CLIP, and indexes it in Pinecone.
    
    Args:
        image_data (dict): A dictionary containing 'photo_id' and 'photo_image_url'.
    
    Returns:
        str: Success or error message.
    """
    try:
        if not isinstance(image_data, dict):
            raise ValueError("Invalid input: Expected a dictionary with 'photo_id' and 'photo_image_url'")

        photo_id = image_data.get("photo_id")
        url = image_data.get("photo_image_url")
        
        if not photo_id or not url:
            raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
        
        # Retrieve the image from the URL
        image = request_method.get_urlimage(image_data)
        if image is None:
            raise ValueError(f"Failed to retrieve image from URL: {url}")

        # Process image and generate embeddings
        inputs = processor(images=image, return_tensors="pt")
        with torch.no_grad():
            image_features = model.get_image_features(**inputs)
        embeddings = image_features.cpu().numpy().flatten().tolist()
        
        # Index the embeddings in Pinecone
        pinecone_index = create_pinecone_index.get_index()
        pinecone_index.upsert(
            vectors=[
                {
                    "id": str(photo_id),
                    "values": embeddings,
                    "metadata": {
                        "url": url,
                        "photo_id": str(photo_id)
                    }
                },
            ],
            namespace="image-search-dataset"
        )
        
        return f"Successfully indexed image {photo_id}"
    
    except Exception as e:
        logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}")
        return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"