Jake-seong commited on
Commit
f17c04f
·
verified ·
1 Parent(s): 814027a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -37
app.py CHANGED
@@ -42,6 +42,10 @@ def get_text_value(node, field_name):
42
  return node[field_name]
43
  return None
44
 
 
 
 
 
45
  def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
46
  """
47
  다중 임베딩된 채팅 데이터에서 유사한 콘텐츠를 검색합니다.
@@ -71,21 +75,24 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
71
  query_embedding = normalize(raw_embedding.reshape(1, -1), norm='l2')[0]
72
  print(f"임베딩 정규화 전/후 첫 5개 요소: {raw_embedding[:5]} -> {query_embedding[:5]}")
73
 
 
 
 
74
  # DB 연결
75
  conn = get_db_conn()
76
  register_vector(conn)
77
 
78
- # 여러 필드를 가중치로 조합한 유사도 검색 SQL - 매개변수화된 쿼리 사용
79
- sql = """
80
  WITH embeddings AS (
81
  SELECT
82
  id,
83
  metadata,
84
  content,
85
- CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> %s::vector) ELSE 0 END * %s as full_sim,
86
- CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> %s::vector) ELSE 0 END * %s as topic_sim,
87
- CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> %s::vector) ELSE 0 END * %s as customer_sim,
88
- CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> %s::vector) ELSE 0 END * %s as agent_sim
89
  FROM vector_store_multi_embeddings
90
  WHERE full_embedding IS NOT NULL
91
  OR topic_embedding IS NOT NULL
@@ -99,20 +106,13 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
99
  (full_sim + topic_sim + customer_sim + agent_sim) as combined_similarity
100
  FROM embeddings
101
  ORDER BY combined_similarity DESC
102
- LIMIT %s
103
  """
104
 
105
  with conn.cursor() as cur:
106
- # 매개변수화된 쿼리 실행
107
- params = (
108
- query_embedding, full_w,
109
- query_embedding, topic_w,
110
- query_embedding, customer_w,
111
- query_embedding, agent_w,
112
- limit
113
- )
114
- print(f"쿼리 실행 - 파라미터: 가중치 설정={full_w}, {topic_w}, {customer_w}, {agent_w}, 결과 제한={limit}")
115
- cur.execute(sql, params)
116
  rows = cur.fetchall()
117
 
118
  print(f"검색 결과: 총 {len(rows)}개 데이터 조회됨")
@@ -226,21 +226,24 @@ def search_similar_chat_by_date(
226
  query_embedding = normalize(raw_embedding.reshape(1, -1), norm='l2')[0]
227
  print(f"날짜 검색 - 임베딩 정규화 전/후 첫 5개 요소: {raw_embedding[:5]} -> {query_embedding[:5]}")
228
 
 
 
 
229
  # DB 연결
230
  conn = get_db_conn()
231
  register_vector(conn)
232
 
233
- # 여러 필드를 가중치로 조합한 유사도 검색 SQL - 매개변수화
234
- sql = """
235
  WITH embeddings AS (
236
  SELECT
237
  id,
238
  metadata,
239
  content,
240
- CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> %s::vector) ELSE 0 END * %s as full_sim,
241
- CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> %s::vector) ELSE 0 END * %s as topic_sim,
242
- CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> %s::vector) ELSE 0 END * %s as customer_sim,
243
- CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> %s::vector) ELSE 0 END * %s as agent_sim
244
  FROM vector_store_multi_embeddings
245
  WHERE full_embedding IS NOT NULL
246
  OR topic_embedding IS NOT NULL
@@ -248,21 +251,12 @@ def search_similar_chat_by_date(
248
  OR agent_embedding IS NOT NULL
249
  """
250
 
251
- params = [
252
- query_embedding, full_w,
253
- query_embedding, topic_w,
254
- query_embedding, customer_w,
255
- query_embedding, agent_w
256
- ]
257
-
258
  # 날짜 필터 추가
259
  if start_timestamp is not None:
260
- sql += " AND (metadata->>'startTime')::bigint >= %s"
261
- params.append(start_timestamp)
262
 
263
  if end_timestamp is not None:
264
- sql += " AND (metadata->>'startTime')::bigint <= %s"
265
- params.append(end_timestamp)
266
 
267
  sql += """
268
  )
@@ -276,11 +270,10 @@ def search_similar_chat_by_date(
276
  LIMIT %s
277
  """
278
 
279
- params.append(limit)
280
-
281
  with conn.cursor() as cur:
282
  print(f"날짜 검색 쿼리 실행: 시작일={start_date}({start_timestamp}), 종료일={end_date}({end_timestamp})")
283
- cur.execute(sql, tuple(params))
 
284
  rows = cur.fetchall()
285
 
286
  print(f"날짜 필터링 검색 결과: 총 {len(rows)}개 데이터 조회됨")
 
42
  return node[field_name]
43
  return None
44
 
45
+ def format_vector_for_pg(vector: List[float]) -> str:
46
+ """벡터를 PostgreSQL 포맷으로 변환합니다."""
47
+ return f"[{','.join(str(x) for x in vector)}]"
48
+
49
  def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
50
  """
51
  다중 임베딩된 채팅 데이터에서 유사한 콘텐츠를 검색합니다.
 
75
  query_embedding = normalize(raw_embedding.reshape(1, -1), norm='l2')[0]
76
  print(f"임베딩 정규화 전/후 첫 5개 요소: {raw_embedding[:5]} -> {query_embedding[:5]}")
77
 
78
+ # Java 방식: 벡터를 문자열로 변환
79
+ query_vector = format_vector_for_pg(query_embedding)
80
+
81
  # DB 연결
82
  conn = get_db_conn()
83
  register_vector(conn)
84
 
85
+ # Java 방식: 문자열 포맷팅 사용한 SQL 쿼리
86
+ sql = f"""
87
  WITH embeddings AS (
88
  SELECT
89
  id,
90
  metadata,
91
  content,
92
+ CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '{query_vector}'::vector) ELSE 0 END * {full_w} as full_sim,
93
+ CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '{query_vector}'::vector) ELSE 0 END * {topic_w} as topic_sim,
94
+ CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '{query_vector}'::vector) ELSE 0 END * {customer_w} as customer_sim,
95
+ CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '{query_vector}'::vector) ELSE 0 END * {agent_w} as agent_sim
96
  FROM vector_store_multi_embeddings
97
  WHERE full_embedding IS NOT NULL
98
  OR topic_embedding IS NOT NULL
 
106
  (full_sim + topic_sim + customer_sim + agent_sim) as combined_similarity
107
  FROM embeddings
108
  ORDER BY combined_similarity DESC
109
+ LIMIT {limit}
110
  """
111
 
112
  with conn.cursor() as cur:
113
+ print(f"쿼리 실행 - Java 방식 포맷팅, 가중치 설정={full_w}, {topic_w}, {customer_w}, {agent_w}, 결과 제한={limit}")
114
+ # Java 방식: 매개변수 없이 직접 쿼리 실행
115
+ cur.execute(sql)
 
 
 
 
 
 
 
116
  rows = cur.fetchall()
117
 
118
  print(f"검색 결과: 총 {len(rows)}개 데이터 조회됨")
 
226
  query_embedding = normalize(raw_embedding.reshape(1, -1), norm='l2')[0]
227
  print(f"날짜 검색 - 임베딩 정규화 전/후 첫 5개 요소: {raw_embedding[:5]} -> {query_embedding[:5]}")
228
 
229
+ # Java 방식: 벡터를 문자열로 변환
230
+ query_vector = format_vector_for_pg(query_embedding)
231
+
232
  # DB 연결
233
  conn = get_db_conn()
234
  register_vector(conn)
235
 
236
+ # Java 방식: 문자열 포맷팅 사용한 SQL 쿼리 시작
237
+ sql = f"""
238
  WITH embeddings AS (
239
  SELECT
240
  id,
241
  metadata,
242
  content,
243
+ CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '{query_vector}'::vector) ELSE 0 END * {full_w} as full_sim,
244
+ CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '{query_vector}'::vector) ELSE 0 END * {topic_w} as topic_sim,
245
+ CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '{query_vector}'::vector) ELSE 0 END * {customer_w} as customer_sim,
246
+ CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '{query_vector}'::vector) ELSE 0 END * {agent_w} as agent_sim
247
  FROM vector_store_multi_embeddings
248
  WHERE full_embedding IS NOT NULL
249
  OR topic_embedding IS NOT NULL
 
251
  OR agent_embedding IS NOT NULL
252
  """
253
 
 
 
 
 
 
 
 
254
  # 날짜 필터 추가
255
  if start_timestamp is not None:
256
+ sql += f" AND (metadata->>'startTime')::bigint >= {start_timestamp}"
 
257
 
258
  if end_timestamp is not None:
259
+ sql += f" AND (metadata->>'startTime')::bigint <= {end_timestamp}"
 
260
 
261
  sql += """
262
  )
 
270
  LIMIT %s
271
  """
272
 
 
 
273
  with conn.cursor() as cur:
274
  print(f"날짜 검색 쿼리 실행: 시작일={start_date}({start_timestamp}), 종료일={end_date}({end_timestamp})")
275
+ # 여기서는 limit만 매개변수로 전달
276
+ cur.execute(sql, (limit,))
277
  rows = cur.fetchall()
278
 
279
  print(f"날짜 필터링 검색 결과: 총 {len(rows)}개 데이터 조회됨")