Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| src_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..", "src")) | |
| sys.path.append(src_directory) | |
| import logging | |
| from transformers import AutoProcessor, CLIPModel | |
| from database import create_pinecone_index | |
| from data import request_method | |
| from dotenv import load_dotenv | |
| import torch | |
| # Add src directory to path | |
| # Load environment variables | |
| load_dotenv() | |
| # HF_ACCESS_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") | |
| # Load CLIP model and processor | |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
| processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
| def get_image_embedding(image_data): | |
| """ | |
| Processes an image, generates embeddings using CLIP, and indexes it in Pinecone. | |
| Args: | |
| image_data (dict): A dictionary containing 'photo_id' and 'photo_image_url'. | |
| Returns: | |
| str: Success or error message. | |
| """ | |
| try: | |
| if not isinstance(image_data, dict): | |
| raise ValueError("Invalid input: Expected a dictionary with 'photo_id' and 'photo_image_url'") | |
| photo_id = image_data.get("photo_id") | |
| url = image_data.get("photo_image_url") | |
| if not photo_id or not url: | |
| raise ValueError("Missing 'photo_id' or 'photo_image_url' in input data") | |
| # Retrieve the image from the URL | |
| image = request_method.get_urlimage(image_data) | |
| if image is None: | |
| raise ValueError(f"Failed to retrieve image from URL: {url}") | |
| # Process image and generate embeddings | |
| inputs = processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| image_features = model.get_image_features(**inputs) | |
| embeddings = image_features.cpu().numpy().flatten().tolist() | |
| # Index the embeddings in Pinecone | |
| pinecone_index = create_pinecone_index.get_index() | |
| pinecone_index.upsert( | |
| vectors=[ | |
| { | |
| "id": str(photo_id), | |
| "values": embeddings, | |
| "metadata": { | |
| "url": url, | |
| "photo_id": str(photo_id) | |
| } | |
| }, | |
| ], | |
| namespace="image-search-dataset" | |
| ) | |
| return f"Successfully indexed image {photo_id}" | |
| except Exception as e: | |
| logging.error(f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}") | |
| return f"Error processing image {image_data.get('photo_id', 'Unknown')}: {e}" | |