VietCat commited on
Commit
85c10bf
·
1 Parent(s): 9637df4

update match_documents

Browse files
Files changed (1) hide show
  1. app/supabase_db.py +38 -24
app/supabase_db.py CHANGED
@@ -3,6 +3,8 @@ from postgrest.types import CountMethod
3
  from supabase.client import create_client, Client
4
  from loguru import logger
5
  import re
 
 
6
 
7
  from .utils import timing_decorator_sync
8
  from .constants import VEHICLE_KEYWORD_TO_COLUMN, VIETNAMESE_STOP_WORDS, VIETNAMESE_STOP_PHRASES
@@ -44,7 +46,7 @@ class SupabaseClient:
44
  return None
45
 
46
  @timing_decorator_sync
47
- def match_documents(self, embedding: List[float], match_count: Optional[int] = None, vehicle_keywords: Optional[List[str]] = None, user_question: str = '', min_rank_threshold: float = 0.001, rrf_k: int = 60):
48
  """
49
  Truy vấn vector similarity search qua RPC match_documents.
50
  Input: embedding (list[float]), match_count (int), vehicle_keywords (list[str] hoặc None)
@@ -74,30 +76,42 @@ class SupabaseClient:
74
  # logger.info(f"[DEBUG][RPC]: embedding: {embedding[:5]}...{embedding[-5:]}")
75
  logger.info(f"[DEBUG][RPC]: embedding: {embedding}")
76
 
77
- try:
78
- payload = {
79
- 'query_text': or_query_tsquery,
80
- 'query_embedding': embedding,
81
- 'match_count': match_count,
82
- 'min_rank_threshold': min_rank_threshold,
83
- 'vehicle_filters': None,
84
- 'rrf_k': rrf_k
85
- }
86
- if vehicle_keywords:
87
- vehicle_columns = [VEHICLE_KEYWORD_TO_COLUMN[k] for k in vehicle_keywords if k in VEHICLE_KEYWORD_TO_COLUMN]
88
- if vehicle_columns:
89
- payload['vehicle_filters'] = vehicle_columns
90
- response = self.client.rpc(
91
- 'match_documents',
92
- payload
93
- ).execute()
94
 
95
- if response.data:
96
- return response.data
97
- return []
98
- except Exception as e:
99
- logger.error(f"Error matching documents: {e}")
100
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  @timing_decorator_sync
103
  def store_embedding(self, text: str, embedding: List[float], metadata: Dict[str, Any]):
 
3
  from supabase.client import create_client, Client
4
  from loguru import logger
5
  import re
6
+ import time
7
+ import httpx
8
 
9
  from .utils import timing_decorator_sync
10
  from .constants import VEHICLE_KEYWORD_TO_COLUMN, VIETNAMESE_STOP_WORDS, VIETNAMESE_STOP_PHRASES
 
46
  return None
47
 
48
  @timing_decorator_sync
49
+ def match_documents(self, embedding: List[float], match_count: Optional[int] = None, vehicle_keywords: Optional[List[str]] = None, user_question: str = '', keyword_threshold: float = 0.01, vector_threshold: float = 0.3, rrf_k: int = 60):
50
  """
51
  Truy vấn vector similarity search qua RPC match_documents.
52
  Input: embedding (list[float]), match_count (int), vehicle_keywords (list[str] hoặc None)
 
76
  # logger.info(f"[DEBUG][RPC]: embedding: {embedding[:5]}...{embedding[-5:]}")
77
  logger.info(f"[DEBUG][RPC]: embedding: {embedding}")
78
 
79
+ payload = {
80
+ 'query_text': or_query_tsquery,
81
+ 'query_embedding': embedding,
82
+ 'match_count': match_count,
83
+ 'keyword_threshold': keyword_threshold,
84
+ 'vector_threshold': vector_threshold,
85
+ 'vehicle_filters': None,
86
+ 'rrf_k': rrf_k
87
+ }
88
+ if vehicle_keywords:
89
+ vehicle_columns = [VEHICLE_KEYWORD_TO_COLUMN[k] for k in vehicle_keywords if k in VEHICLE_KEYWORD_TO_COLUMN]
90
+ if vehicle_columns:
91
+ payload['vehicle_filters'] = vehicle_columns
 
 
 
 
92
 
93
+ max_retries = 3
94
+ for attempt in range(max_retries):
95
+ try:
96
+ response = self.client.rpc(
97
+ 'match_documents',
98
+ payload
99
+ ).execute()
100
+
101
+ if response.data:
102
+ return response.data
103
+ return []
104
+ except httpx.TimeoutException:
105
+ logger.warning(f"Supabase RPC 'match_documents' timeout on attempt {attempt + 1}/{max_retries}. Retrying...")
106
+ if attempt == max_retries - 1:
107
+ logger.error(f"Supabase RPC failed after {max_retries} attempts due to timeout.")
108
+ return []
109
+ time.sleep(1 * (2 ** attempt)) # Exponential backoff: 1s, 2s, 4s
110
+ except Exception as e:
111
+ logger.error(f"Error matching documents: {e}")
112
+ return []
113
+
114
+ return [] # Fallback in case loop finishes without returning
115
 
116
  @timing_decorator_sync
117
  def store_embedding(self, text: str, embedding: List[float], metadata: Dict[str, Any]):