|
|
import os |
|
|
from pinecone import Pinecone |
|
|
from dotenv import load_dotenv |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") |
|
|
if not PINECONE_API_KEY: |
|
|
raise ValueError("PINECONE_API_KEY is missing! Check your .env file.") |
|
|
|
|
|
INDEX_NAME = "unsplash-index-session" |
|
|
NAMESPACE = "image-search-dataset" |
|
|
|
|
|
|
|
|
pc = Pinecone(api_key=PINECONE_API_KEY) |
|
|
index = pc.Index(INDEX_NAME) |
|
|
|
|
|
def search_similar_images(query_embedding: List[float], top_k: int = 10) -> List[Dict[str, Any]]: |
|
|
"""Search for similar images in Pinecone using the given embedding.""" |
|
|
try: |
|
|
results = index.query( |
|
|
vector=query_embedding, |
|
|
top_k=top_k, |
|
|
include_metadata=True, |
|
|
namespace=NAMESPACE |
|
|
) |
|
|
|
|
|
cleaned_results = [] |
|
|
for match in results.get("matches", []): |
|
|
metadata = match.get("metadata", {}) |
|
|
cleaned_results.append({ |
|
|
"id": match["id"], |
|
|
"score": float(match["score"]), |
|
|
"url": metadata.get("url", "") |
|
|
}) |
|
|
|
|
|
return cleaned_results |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Error querying Pinecone: {e}") |
|
|
return [] |