import os import sys src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src")) sys.path.append(src_directory) import logging from transformers import AutoProcessor, CLIPModel from database import create_pinecone_index from data import request_method from dotenv import load_dotenv import torch # Add src directory to path # Load environment variables load_dotenv() # HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") # Load CLIP model and processor model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") def get_image_embedding(image_data): """ Processes an image, generates embeddings using CLIP, and indexes it in Pinecone. Args: image_data (dict): A dictionary containing 'photo_id' and 'photo_image_url'. Returns: str: Success or error message. """ try: if not isinstance(image_data, dict): raise ValueError("Invalid input: Expected a dictionary with 'photo_id' and 'photo_image_url'") photo_id = image_data.get("photo_id") url = image_data.get("photo_image_url") if not photo_id or not url: raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data") # Retrieve the image from the URL image = request_method.get_urlimage(image_data) if image is None: raise ValueError(f"Failed to retrieve image from URL: {url}") # Process image and generate embeddings inputs = processor(images=image, return_tensors="pt") with torch.no_grad(): image_features = model.get_image_features(**inputs) embeddings = image_features.cpu().numpy().flatten().tolist() # Index the embeddings in Pinecone pinecone_index = create_pinecone_index.get_index() pinecone_index.upsert( vectors=[ { "id": str(photo_id), "values": embeddings, "metadata": { "url": url, "photo_id": str(photo_id) } }, ], namespace="image-search-dataset" ) return f"Successfully indexed image {photo_id}" except Exception as e: logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}") return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"