Clip_Image_Search / src /model /clip_model.py
velmurugan1122's picture
fix the changes
39bee95
import os
import sys
src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src"))
sys.path.append(src_directory)
import logging
from transformers import AutoProcessor, CLIPModel
from database import create_pinecone_index
from data import request_method
from dotenv import load_dotenv
import torch
# Add src directory to path
# Load environment variables
load_dotenv()
# HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
def get_image_embedding(image_data):
"""
Processes an image, generates embeddings using CLIP, and indexes it in Pinecone.
Args:
image_data (dict): A dictionary containing 'photo_id' and 'photo_image_url'.
Returns:
str: Success or error message.
"""
try:
if not isinstance(image_data, dict):
raise ValueError("Invalid input: Expected a dictionary with 'photo_id' and 'photo_image_url'")
photo_id = image_data.get("photo_id")
url = image_data.get("photo_image_url")
if not photo_id or not url:
raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data")
# Retrieve the image from the URL
image = request_method.get_urlimage(image_data)
if image is None:
raise ValueError(f"Failed to retrieve image from URL: {url}")
# Process image and generate embeddings
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
image_features = model.get_image_features(**inputs)
embeddings = image_features.cpu().numpy().flatten().tolist()
# Index the embeddings in Pinecone
pinecone_index = create_pinecone_index.get_index()
pinecone_index.upsert(
vectors=[
{
"id": str(photo_id),
"values": embeddings,
"metadata": {
"url": url,
"photo_id": str(photo_id)
}
},
],
namespace="image-search-dataset"
)
return f"Successfully indexed image {photo_id}"
except Exception as e:
logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}")
return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}"