AdarshDRC commited on
Commit
0df58f0
·
verified ·
1 Parent(s): c96096b

Update src/cloud_db.py

Browse files
Files changed (1) hide show
  1. src/cloud_db.py +38 -28
src/cloud_db.py CHANGED
@@ -4,55 +4,65 @@ import cloudinary.uploader
4
  from pinecone import Pinecone
5
  from dotenv import load_dotenv
6
 
7
- # Load keys from the .env file
8
  load_dotenv()
9
 
10
  class CloudDB:
11
  def __init__(self):
12
- # 1. Connect to Cloudinary
13
  cloudinary.config(
14
  cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"),
15
  api_key=os.getenv("CLOUDINARY_API_KEY"),
16
  api_secret=os.getenv("CLOUDINARY_API_SECRET")
17
  )
18
 
19
- # 2. Connect to Pinecone
20
  self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
21
- self.index = self.pc.Index(os.getenv("PINECONE_INDEX_NAME"))
 
 
22
 
23
  def upload_image(self, file_path, folder_name="visual_search"):
24
- """Uploads an image to Cloudinary and returns the public URL."""
25
  response = cloudinary.uploader.upload(file_path, folder=folder_name)
26
  return response['secure_url']
27
 
28
- def add_vector(self, vector, image_url, image_id):
29
- """Saves the vector and the image URL to Pinecone."""
30
- # Convert numpy array to list for Pinecone
31
- vector_list = vector.tolist() if hasattr(vector, 'tolist') else vector
32
 
33
- self.index.upsert(vectors=[{
34
  "id": image_id,
35
  "values": vector_list,
36
  "metadata": {"image_url": image_url}
37
- }])
38
 
39
- def search(self, query_vector, top_k=10, min_score=0.60): # <-- CHANGED baseline to 0.60
40
- """Searches Pinecone and filters out baseline 'random noise' matches."""
41
- vector_list = query_vector.tolist() if hasattr(query_vector, 'tolist') else query_vector
42
-
43
- response = self.index.query(
44
- vector=vector_list,
45
- top_k=top_k,
46
- include_metadata=True
47
- )
48
-
49
  results = []
50
- for match in response['matches']:
51
- # Only keep the image if it's an ACTUAL mathematical match (60% or higher)
52
- if match['score'] >= min_score:
53
- results.append({
54
- "url": match['metadata']['image_url'],
55
- "score": match['score']
56
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  return results
 
4
  from pinecone import Pinecone
5
  from dotenv import load_dotenv
6
 
 
7
  load_dotenv()
8
 
9
  class CloudDB:
10
  def __init__(self):
 
11
  cloudinary.config(
12
  cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"),
13
  api_key=os.getenv("CLOUDINARY_API_KEY"),
14
  api_secret=os.getenv("CLOUDINARY_API_SECRET")
15
  )
16
 
 
17
  self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
18
+ # Connect to the TWO new indexes
19
+ self.index_faces = self.pc.Index("enterprise-faces")
20
+ self.index_objects = self.pc.Index("enterprise-objects")
21
 
22
  def upload_image(self, file_path, folder_name="visual_search"):
 
23
  response = cloudinary.uploader.upload(file_path, folder=folder_name)
24
  return response['secure_url']
25
 
26
+ def add_vector(self, data_dict, image_url, image_id):
27
+ vector_list = data_dict["vector"].tolist() if hasattr(data_dict["vector"], 'tolist') else data_dict["vector"]
 
 
28
 
29
+ payload = [{
30
  "id": image_id,
31
  "values": vector_list,
32
  "metadata": {"image_url": image_url}
33
+ }]
34
 
35
+ if data_dict["type"] == "face":
36
+ self.index_faces.upsert(vectors=payload)
37
+ else:
38
+ self.index_objects.upsert(vectors=payload)
39
+
40
+ def search(self, query_dict, top_k=10, min_score=0.45):
41
+ vector_list = query_dict["vector"].tolist() if hasattr(query_dict["vector"], 'tolist') else query_dict["vector"]
 
 
 
42
  results = []
43
+
44
+ if query_dict["type"] == "face":
45
+ response = self.index_faces.query(vector=vector_list, top_k=top_k, include_metadata=True)
46
+ RAW_THRESHOLD = 0.35
47
+
48
+ for match in response['matches']:
49
+ raw_score = match['score']
50
+ if raw_score >= RAW_THRESHOLD:
51
+ ui_score = 0.75 + ((raw_score - RAW_THRESHOLD) / (1.0 - RAW_THRESHOLD)) * 0.24
52
+ ui_score = min(0.99, ui_score)
53
+ results.append({
54
+ "url": match['metadata']['image_url'],
55
+ "score": ui_score,
56
+ "caption": "👤 Verified Identity Match"
57
+ })
58
+ else:
59
+ response = self.index_objects.query(vector=vector_list, top_k=top_k, include_metadata=True)
60
+ for match in response['matches']:
61
+ if match['score'] >= min_score:
62
+ results.append({
63
+ "url": match['metadata']['image_url'],
64
+ "score": match['score'],
65
+ "caption": "🎯 Visual & Semantic Match"
66
+ })
67
 
68
  return results