Jake-seong commited on
Commit
401dcc6
Β·
verified Β·
1 Parent(s): 21f7ac0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -78
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  from typing import List, Dict, Tuple, Any
7
  from pgvector.psycopg2 import register_vector
8
  import numpy as np
9
- from datetime import datetime
10
 
11
  # κ°€μ€‘μΉ˜ 및 μž„κ³„κ°’ μ„€μ •
12
  DEFAULT_FULL_WEIGHT = 0.2
@@ -15,8 +15,12 @@ DEFAULT_CUSTOMER_WEIGHT = 0.2
15
  DEFAULT_AGENT_WEIGHT = 0.1
16
  DEFAULT_SIMILARITY_THRESHOLD = 0
17
 
 
 
 
18
  # DB μ—°κ²° μ„€μ •
19
  def get_db_conn():
 
20
  return psycopg2.connect(
21
  host=os.environ["VECTOR_HOST"],
22
  port=5432,
@@ -25,32 +29,49 @@ def get_db_conn():
25
  password=os.environ["VECTOR_SECRET"]
26
  )
27
 
28
- client = OpenAI()
29
-
30
  def get_embedding(text: str) -> List[float]:
31
- """ν…μŠ€νŠΈλ₯Ό μž„λ² λ”© λ²‘ν„°λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€."""
32
- response = client.embeddings.create(
33
- input=text,
34
- model="text-embedding-3-small",
35
- encoding_format="float" # λͺ…μ‹œμ μœΌλ‘œ float ν˜•μ‹ μ§€μ •
36
- )
37
- return response.data[0].embedding
38
-
39
- def get_text_value(node, field_name):
40
- """JSON λ…Έλ“œμ—μ„œ ν…μŠ€νŠΈ 값을 μ•ˆμ „ν•˜κ²Œ μΆ”μΆœν•©λ‹ˆλ‹€."""
41
- if node and field_name in node and node[field_name] is not None:
42
- return node[field_name]
43
- return None
 
 
 
 
 
 
 
44
 
45
  def format_vector_for_pg(vector: List[float]) -> str:
46
- """벑터λ₯Ό PostgreSQL 포맷으둜 λ³€ν™˜ν•©λ‹ˆλ‹€."""
47
- # 정밀도 μœ μ§€λ₯Ό μœ„ν•΄ str() ν•¨μˆ˜ λŒ€μ‹  μ†Œμˆ˜μ  μ œν•œ 없이 λ°”λ‘œ join
 
 
 
48
  vector_str = ','.join([f"{x}" for x in vector])
49
  return f"[{vector_str}]"
50
 
 
 
 
 
 
 
 
 
 
51
  def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
52
  """
53
- 닀쀑 μž„λ² λ”©λœ μ±„νŒ… λ°μ΄ν„°μ—μ„œ μœ μ‚¬ν•œ μ½˜ν…μΈ λ₯Ό κ²€μƒ‰ν•©λ‹ˆλ‹€.
54
 
55
  Args:
56
  query (str): 검색할 쿼리 ν…μŠ€νŠΈ
@@ -61,7 +82,7 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
61
  """
62
  limit = max_results if max_results is not None else 100
63
 
64
- # κ°€μ€‘μΉ˜ μ„€μ •
65
  full_w = DEFAULT_FULL_WEIGHT
66
  topic_w = DEFAULT_TOPIC_WEIGHT
67
  customer_w = DEFAULT_CUSTOMER_WEIGHT
@@ -71,28 +92,27 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
71
  print(f"닀쀑 μž„λ² λ”© 검색 μ‹œμž‘: 쿼리='{query}', κ°€μ€‘μΉ˜=(full={full_w}, topic={topic_w}, customer={customer_w}, agent={agent_w}), μ΅œλŒ€ κ²°κ³Ό={limit}")
72
 
73
  try:
74
- # 쿼리 μž„λ² λ”© 생성 - 인코딩 포맷 λͺ…μ‹œ
75
  query_embedding = get_embedding(query)
76
- print(f"μž„λ² λ”© 생성 μ™„λ£Œ: 벑터 길이={len(query_embedding)}")
77
 
78
- # 벑터 포맷 λ³€ν™˜
79
  query_vector = format_vector_for_pg(query_embedding)
80
 
81
  # DB μ—°κ²°
82
  conn = get_db_conn()
83
  register_vector(conn)
84
 
85
- # μžλ°” κ΅¬ν˜„κ³Ό μΌμΉ˜ν•˜λ„λ‘ SQL 쿼리 μˆ˜μ •
86
- sql = f"""
87
- WITH similarities AS (
88
  SELECT
89
  id,
90
  metadata,
91
  content,
92
- CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '{query_vector}'::vector) ELSE 0 END * {full_w} as full_sim,
93
- CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '{query_vector}'::vector) ELSE 0 END * {topic_w} as topic_sim,
94
- CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '{query_vector}'::vector) ELSE 0 END * {customer_w} as customer_sim,
95
- CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '{query_vector}'::vector) ELSE 0 END * {agent_w} as agent_sim
96
  FROM vector_store_multi_embeddings
97
  WHERE full_embedding IS NOT NULL
98
  OR topic_embedding IS NOT NULL
@@ -103,25 +123,20 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
103
  id,
104
  metadata,
105
  content,
106
- (full_sim + topic_sim + customer_sim + agent_sim) as combined_similarity,
107
- full_sim / {full_w} as full_raw_sim,
108
- topic_sim / {topic_w} as topic_raw_sim,
109
- customer_sim / {customer_w} as customer_raw_sim,
110
- agent_sim / {agent_w} as agent_raw_sim
111
- FROM similarities
112
  ORDER BY combined_similarity DESC
113
- LIMIT {limit}
114
- """
115
 
116
  with conn.cursor() as cur:
117
- print(f"쿼리 μ‹€ν–‰: μžλ°” κ΅¬ν˜„κ³Ό μΌμΉ˜ν•˜λ„λ‘ μˆ˜μ •")
118
  cur.execute(sql)
119
  rows = cur.fetchall()
120
 
121
  print(f"검색 κ²°κ³Ό: 총 {len(rows)}개 데이터 쑰회됨")
122
  if len(rows) > 0:
123
  print(f"첫 번째 κ²°κ³Ό ID: {rows[0][0]}, μœ μ‚¬λ„: {float(rows[0][3])}")
124
- print(f"첫 번째 κ²°κ³Ό μ›μ‹œ μœ μ‚¬λ„ - full: {rows[0][4]}, topic: {rows[0][5]}, customer: {rows[0][6]}, agent: {rows[0][7]}")
125
 
126
  results = []
127
  for row in rows:
@@ -129,12 +144,6 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
129
  metadata_json = row[1]
130
  content = row[2]
131
  similarity_score = float(row[3])
132
- raw_sims = {
133
- "full": None if row[4] is None else float(row[4]),
134
- "topic": None if row[5] is None else float(row[5]),
135
- "customer": None if row[6] is None else float(row[6]),
136
- "agent": None if row[7] is None else float(row[7])
137
- }
138
 
139
  # 메타데이터 νŒŒμ‹±
140
  try:
@@ -145,11 +154,10 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
145
  "similarityScore": similarity_score,
146
  "content": content,
147
  "chatId": get_text_value(metadata, "chatId"),
148
- "topic": get_text_value(metadata, "topic"),
149
- "rawSimilarities": raw_sims
150
  }
151
 
152
- # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš©
153
  if "startTime" in metadata and metadata["startTime"] is not None:
154
  result["startTime"] = metadata["startTime"]
155
 
@@ -162,12 +170,17 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
162
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
163
  continue
164
 
165
- if len(results) > 0:
166
- print(f"κ°€μž₯ 높은 μœ μ‚¬λ„ 점수: {results[0]['similarityScore']}")
167
- print(f"μƒμœ„ κ²°κ³Ό μ±—ID: {results[0].get('chatId')}, 주제: {results[0].get('topic', '')[:50]}...")
168
- print(f"μƒμœ„ κ²°κ³Ό μ›μ‹œ μœ μ‚¬λ„: {results[0]['rawSimilarities']}")
169
 
170
- return results
 
 
 
 
 
 
 
171
 
172
  except Exception as e:
173
  print(f"닀쀑 μž„λ² λ”© 검색 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
@@ -184,7 +197,7 @@ def search_similar_chat_by_date(
184
  max_results: int = 100
185
  ) -> List[Dict]:
186
  """
187
- μ§€μ •λœ λ‚ μ§œ λ²”μœ„ λ‚΄μ˜ 닀쀑 μž„λ² λ”© μ±„νŒ… 데이터λ₯Ό κ²€μƒ‰ν•©λ‹ˆλ‹€.
188
 
189
  Args:
190
  query (str): 검색할 쿼리 ν…μŠ€νŠΈ
@@ -197,7 +210,7 @@ def search_similar_chat_by_date(
197
  """
198
  limit = max_results if max_results is not None else 100
199
 
200
- # κ°€μ€‘μΉ˜ μ„€μ •
201
  full_w = DEFAULT_FULL_WEIGHT
202
  topic_w = DEFAULT_TOPIC_WEIGHT
203
  customer_w = DEFAULT_CUSTOMER_WEIGHT
@@ -207,13 +220,14 @@ def search_similar_chat_by_date(
207
  print(f"닀쀑 μž„λ² λ”© λ‚ μ§œ 검색 μ‹œμž‘: 쿼리='{query}', μ‹œμž‘μΌ={start_date}, μ’…λ£ŒμΌ={end_date}, μ΅œλŒ€ κ²°κ³Ό={limit}")
208
 
209
  try:
210
- # λ‚ μ§œ ν•„ν„° νŒŒλΌλ―Έν„° 생성
211
  start_timestamp = None
212
  end_timestamp = None
213
 
214
  if start_date and start_date.strip():
215
  try:
216
- start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
 
217
  start_timestamp = int(start_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
218
  except ValueError as e:
219
  print(f"μ‹œμž‘ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
@@ -221,43 +235,42 @@ def search_similar_chat_by_date(
221
 
222
  if end_date and end_date.strip():
223
  try:
224
- # μ’…λ£ŒμΌμ˜ 23:59:59둜 μ„€μ •
225
- end_datetime = datetime.strptime(end_date + ' 23:59:59', '%Y-%m-%d %H:%M:%S')
226
  end_timestamp = int(end_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
227
  except ValueError as e:
228
  print(f"μ’…λ£Œ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
229
  return []
230
 
231
- # 쿼리 μž„λ² λ”© 생성 - μ •κ·œν™” 제거
232
  query_embedding = get_embedding(query)
233
- print(f"λ‚ μ§œ 검색 - μž„λ² λ”© 생성 μ™„λ£Œ: 첫 5개 μš”μ†Œ: {query_embedding[:5]}")
234
 
235
- # Java 방식: 벑터λ₯Ό λ¬Έμžμ—΄λ‘œ λ³€ν™˜
236
  query_vector = format_vector_for_pg(query_embedding)
237
 
238
  # DB μ—°κ²°
239
  conn = get_db_conn()
240
  register_vector(conn)
241
 
242
- # Java 방식: λ¬Έμžμ—΄ ν¬λ§·νŒ… μ‚¬μš©ν•œ SQL 쿼리 μ‹œμž‘
243
- sql = f"""
244
  WITH embeddings AS (
245
  SELECT
246
  id,
247
  metadata,
248
  content,
249
- CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '{query_vector}'::vector) ELSE 0 END * {full_w} as full_sim,
250
- CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '{query_vector}'::vector) ELSE 0 END * {topic_w} as topic_sim,
251
- CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '{query_vector}'::vector) ELSE 0 END * {customer_w} as customer_sim,
252
- CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '{query_vector}'::vector) ELSE 0 END * {agent_w} as agent_sim
253
  FROM vector_store_multi_embeddings
254
  WHERE full_embedding IS NOT NULL
255
  OR topic_embedding IS NOT NULL
256
  OR customer_embedding IS NOT NULL
257
  OR agent_embedding IS NOT NULL
258
- """
259
 
260
- # λ‚ μ§œ ν•„ν„° μΆ”κ°€
261
  if start_timestamp is not None:
262
  sql += f" AND (metadata->>'startTime')::bigint >= {start_timestamp}"
263
 
@@ -278,7 +291,7 @@ def search_similar_chat_by_date(
278
 
279
  with conn.cursor() as cur:
280
  print(f"λ‚ μ§œ 검색 쿼리 μ‹€ν–‰: μ‹œμž‘μΌ={start_date}({start_timestamp}), μ’…λ£ŒμΌ={end_date}({end_timestamp})")
281
- # μ—¬κΈ°μ„œλŠ” limit만 λ§€κ°œλ³€μˆ˜λ‘œ 전달
282
  cur.execute(sql, (limit,))
283
  rows = cur.fetchall()
284
 
@@ -305,7 +318,7 @@ def search_similar_chat_by_date(
305
  "topic": get_text_value(metadata, "topic")
306
  }
307
 
308
- # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš©
309
  if "startTime" in metadata and metadata["startTime"] is not None:
310
  result["startTime"] = metadata["startTime"]
311
 
@@ -318,13 +331,15 @@ def search_similar_chat_by_date(
318
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
319
  continue
320
 
321
- # μž„κ³„κ°’ 필터링
322
  filtered_results = [r for r in results if r["similarityScore"] >= threshold]
323
- print(f"λ‚ μ§œ 검색 - μž„κ³„κ°’({threshold}) 이상 κ²°κ³Ό: {len(filtered_results)}개 / 전체 {len(results)}개")
324
 
325
  if len(filtered_results) > 0:
 
326
  print(f"λ‚ μ§œ 검색 - κ°€μž₯ 높은 μœ μ‚¬λ„ 점수: {filtered_results[0]['similarityScore']}")
327
  print(f"λ‚ μ§œ 검색 - μƒμœ„ κ²°κ³Ό μ±—ID: {filtered_results[0].get('chatId')}, μ‹œμž‘μ‹œκ°„: {filtered_results[0].get('startTime')}")
 
 
328
 
329
  return filtered_results
330
 
@@ -336,11 +351,11 @@ def search_similar_chat_by_date(
336
  if 'conn' in locals():
337
  conn.close()
338
 
339
- # Gradio Blocks에 ν•¨μˆ˜ 등둝
340
  with gr.Blocks() as demo:
341
- gr.Markdown("# Chat Analysis Search")
342
  gr.Interface(fn=search_similar_chat, inputs=["text", "number"], outputs="json", api_name="search_similar_chat")
343
  gr.Interface(fn=search_similar_chat_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chat_by_date")
344
 
345
  if __name__ == "__main__":
346
- demo.launch(mcp_server=True)
 
6
  from typing import List, Dict, Tuple, Any
7
  from pgvector.psycopg2 import register_vector
8
  import numpy as np
9
+ from datetime import datetime, timezone
10
 
11
  # κ°€μ€‘μΉ˜ 및 μž„κ³„κ°’ μ„€μ •
12
  DEFAULT_FULL_WEIGHT = 0.2
 
15
  DEFAULT_AGENT_WEIGHT = 0.1
16
  DEFAULT_SIMILARITY_THRESHOLD = 0
17
 
18
+ # OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™”
19
+ client = OpenAI()
20
+
21
  # DB μ—°κ²° μ„€μ •
22
  def get_db_conn():
23
+ """PostgreSQL λ°μ΄ν„°λ² μ΄μŠ€μ— μ—°κ²°ν•©λ‹ˆλ‹€."""
24
  return psycopg2.connect(
25
  host=os.environ["VECTOR_HOST"],
26
  port=5432,
 
29
  password=os.environ["VECTOR_SECRET"]
30
  )
31
 
 
 
32
  def get_embedding(text: str) -> List[float]:
33
+ """
34
+ ν…μŠ€νŠΈλ₯Ό OpenAI의 text-embedding-3-small λͺ¨λΈμ„ μ‚¬μš©ν•˜μ—¬ μž„λ² λ”© λ²‘ν„°λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.
35
+
36
+ Args:
37
+ text (str): μž„λ² λ”©ν•  ν…μŠ€νŠΈ
38
+
39
+ Returns:
40
+ List[float]: μž„λ² λ”© 벑터
41
+ """
42
+ try:
43
+ response = client.embeddings.create(
44
+ input=text,
45
+ model="text-embedding-3-small",
46
+ encoding_format="float", # λͺ…μ‹œμ μœΌλ‘œ float ν˜•μ‹ μ§€μ •
47
+ dimensions=1536 # 차원 수 λͺ…μ‹œ
48
+ )
49
+ return response.data[0].embedding
50
+ except Exception as e:
51
+ print(f"μž„λ² λ”© 생성 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
52
+ raise
53
 
54
  def format_vector_for_pg(vector: List[float]) -> str:
55
+ """
56
+ μž„λ² λ”© 벑터λ₯Ό PostgreSQL 포맷으둜 λ³€ν™˜ν•©λ‹ˆλ‹€.
57
+ μžλ°”μ˜ formatVectorForPg() λ©”μ†Œλ“œμ™€ λ™μΌν•œ κΈ°λŠ₯μž…λ‹ˆλ‹€.
58
+ """
59
+ # μžλ°” κ΅¬ν˜„κ³Ό λ™μΌν•˜κ²Œ StringBuilder λ°©μ‹μœΌλ‘œ κ΅¬ν˜„
60
  vector_str = ','.join([f"{x}" for x in vector])
61
  return f"[{vector_str}]"
62
 
63
+ def get_text_value(node: Dict, field_name: str) -> str:
64
+ """
65
+ λ”•μ…”λ„ˆλ¦¬μ—μ„œ ν…μŠ€νŠΈ 값을 μ•ˆμ „ν•˜κ²Œ μΆ”μΆœν•©λ‹ˆλ‹€.
66
+ μžλ°”μ˜ getTextValue() λ©”μ†Œλ“œμ™€ λ™μΌν•œ κΈ°λŠ₯μž…λ‹ˆλ‹€.
67
+ """
68
+ if node and field_name in node and node[field_name] is not None:
69
+ return node[field_name]
70
+ return None
71
+
72
  def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
73
  """
74
+ μ±„νŒ… λ°μ΄ν„°μ—μ„œ μœ μ‚¬ν•œ μ½˜ν…μΈ λ₯Ό κ²€μƒ‰ν•©λ‹ˆλ‹€.
75
 
76
  Args:
77
  query (str): 검색할 쿼리 ν…μŠ€νŠΈ
 
82
  """
83
  limit = max_results if max_results is not None else 100
84
 
85
+ # μžλ°”μ™€ λ™μΌν•œ κ°€μ€‘μΉ˜ μ„€μ •
86
  full_w = DEFAULT_FULL_WEIGHT
87
  topic_w = DEFAULT_TOPIC_WEIGHT
88
  customer_w = DEFAULT_CUSTOMER_WEIGHT
 
92
  print(f"닀쀑 μž„λ² λ”© 검색 μ‹œμž‘: 쿼리='{query}', κ°€μ€‘μΉ˜=(full={full_w}, topic={topic_w}, customer={customer_w}, agent={agent_w}), μ΅œλŒ€ κ²°κ³Ό={limit}")
93
 
94
  try:
95
+ # 쿼리 μž„λ² λ”© 생성
96
  query_embedding = get_embedding(query)
 
97
 
98
+ # PostgreSQL 포맷으둜 벑터 λ³€ν™˜
99
  query_vector = format_vector_for_pg(query_embedding)
100
 
101
  # DB μ—°κ²°
102
  conn = get_db_conn()
103
  register_vector(conn)
104
 
105
+ # μžλ°” μ½”λ“œμ™€ λ™μΌν•œ SQL 쿼리 κ΅¬ν˜„
106
+ sql = """
107
+ WITH embeddings AS (
108
  SELECT
109
  id,
110
  metadata,
111
  content,
112
+ CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '%s'::vector) ELSE 0 END * %f as full_sim,
113
+ CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '%s'::vector) ELSE 0 END * %f as topic_sim,
114
+ CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '%s'::vector) ELSE 0 END * %f as customer_sim,
115
+ CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '%s'::vector) ELSE 0 END * %f as agent_sim
116
  FROM vector_store_multi_embeddings
117
  WHERE full_embedding IS NOT NULL
118
  OR topic_embedding IS NOT NULL
 
123
  id,
124
  metadata,
125
  content,
126
+ (full_sim + topic_sim + customer_sim + agent_sim) as combined_similarity
127
+ FROM embeddings
 
 
 
 
128
  ORDER BY combined_similarity DESC
129
+ LIMIT %s
130
+ """ % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w, limit)
131
 
132
  with conn.cursor() as cur:
133
+ print(f"쿼리 μ‹€ν–‰: μžλ°” κ΅¬ν˜„κ³Ό λ™μΌν•˜κ²Œ μˆ˜μ •")
134
  cur.execute(sql)
135
  rows = cur.fetchall()
136
 
137
  print(f"검색 κ²°κ³Ό: 총 {len(rows)}개 데이터 쑰회됨")
138
  if len(rows) > 0:
139
  print(f"첫 번째 κ²°κ³Ό ID: {rows[0][0]}, μœ μ‚¬λ„: {float(rows[0][3])}")
 
140
 
141
  results = []
142
  for row in rows:
 
144
  metadata_json = row[1]
145
  content = row[2]
146
  similarity_score = float(row[3])
 
 
 
 
 
 
147
 
148
  # 메타데이터 νŒŒμ‹±
149
  try:
 
154
  "similarityScore": similarity_score,
155
  "content": content,
156
  "chatId": get_text_value(metadata, "chatId"),
157
+ "topic": get_text_value(metadata, "topic")
 
158
  }
159
 
160
+ # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš© (이미 KST둜 μ €μž₯λ˜μ–΄ 있음)
161
  if "startTime" in metadata and metadata["startTime"] is not None:
162
  result["startTime"] = metadata["startTime"]
163
 
 
170
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
171
  continue
172
 
173
+ # μž„κ³„κ°’ 필터링 (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
174
+ filtered_results = [r for r in results if r["similarityScore"] >= threshold]
 
 
175
 
176
+ if len(filtered_results) > 0:
177
+ print(f"μž„κ³„κ°’({threshold}) 이상 κ²°κ³Ό: {len(filtered_results)}개 / 전체 {len(results)}개")
178
+ print(f"κ°€μž₯ 높은 μœ μ‚¬λ„ 점수: {filtered_results[0]['similarityScore']}")
179
+ print(f"μƒμœ„ κ²°κ³Ό μ±—ID: {filtered_results[0].get('chatId')}, 주제: {filtered_results[0].get('topic', '')[:50]}...")
180
+ else:
181
+ print(f"μž„κ³„κ°’({threshold}) μ΄μƒμ˜ κ²°κ³ΌοΏ½οΏ½ μ—†μŠ΅λ‹ˆλ‹€")
182
+
183
+ return filtered_results
184
 
185
  except Exception as e:
186
  print(f"닀쀑 μž„λ² λ”© 검색 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
 
197
  max_results: int = 100
198
  ) -> List[Dict]:
199
  """
200
+ μ§€μ •λœ λ‚ μ§œ λ²”μœ„ λ‚΄μ˜ μ±„νŒ… 데이터λ₯Ό κ²€μƒ‰ν•©λ‹ˆλ‹€.
201
 
202
  Args:
203
  query (str): 검색할 쿼리 ν…μŠ€νŠΈ
 
210
  """
211
  limit = max_results if max_results is not None else 100
212
 
213
+ # μžλ°”μ™€ λ™μΌν•œ κ°€μ€‘μΉ˜ μ„€μ •
214
  full_w = DEFAULT_FULL_WEIGHT
215
  topic_w = DEFAULT_TOPIC_WEIGHT
216
  customer_w = DEFAULT_CUSTOMER_WEIGHT
 
220
  print(f"닀쀑 μž„λ² λ”© λ‚ μ§œ 검색 μ‹œμž‘: 쿼리='{query}', μ‹œμž‘μΌ={start_date}, μ’…λ£ŒμΌ={end_date}, μ΅œλŒ€ κ²°κ³Ό={limit}")
221
 
222
  try:
223
+ # λ‚ μ§œ ν•„ν„° νŒŒλΌλ―Έν„° 생성 (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
224
  start_timestamp = None
225
  end_timestamp = None
226
 
227
  if start_date and start_date.strip():
228
  try:
229
+ # μžλ°”μ—μ„œλŠ” LocalDateTime.parse() μ‚¬μš©ν•˜λ―€λ‘œ λ™μΌν•˜κ²Œ κ΅¬ν˜„
230
+ start_datetime = datetime.strptime(start_date + "T00:00:00", '%Y-%m-%dT%H:%M:%S')
231
  start_timestamp = int(start_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
232
  except ValueError as e:
233
  print(f"μ‹œμž‘ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
 
235
 
236
  if end_date and end_date.strip():
237
  try:
238
+ # μžλ°”μ—μ„œλŠ” LocalDateTime.parse() μ‚¬μš©ν•˜λ―€λ‘œ λ™μΌν•˜κ²Œ κ΅¬ν˜„
239
+ end_datetime = datetime.strptime(end_date + "T23:59:59", '%Y-%m-%dT%H:%M:%S')
240
  end_timestamp = int(end_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
241
  except ValueError as e:
242
  print(f"μ’…λ£Œ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
243
  return []
244
 
245
+ # 쿼리 μž„λ² λ”© 생성
246
  query_embedding = get_embedding(query)
 
247
 
248
+ # PostgreSQL 포맷으둜 벑터 λ³€ν™˜
249
  query_vector = format_vector_for_pg(query_embedding)
250
 
251
  # DB μ—°κ²°
252
  conn = get_db_conn()
253
  register_vector(conn)
254
 
255
+ # μžλ°” μ½”λ“œμ™€ λ™μΌν•œ SQL 쿼리 μ‹œμž‘
256
+ sql = """
257
  WITH embeddings AS (
258
  SELECT
259
  id,
260
  metadata,
261
  content,
262
+ CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '%s'::vector) ELSE 0 END * %f as full_sim,
263
+ CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '%s'::vector) ELSE 0 END * %f as topic_sim,
264
+ CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '%s'::vector) ELSE 0 END * %f as customer_sim,
265
+ CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '%s'::vector) ELSE 0 END * %f as agent_sim
266
  FROM vector_store_multi_embeddings
267
  WHERE full_embedding IS NOT NULL
268
  OR topic_embedding IS NOT NULL
269
  OR customer_embedding IS NOT NULL
270
  OR agent_embedding IS NOT NULL
271
+ """ % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w)
272
 
273
+ # λ‚ μ§œ ν•„ν„° μΆ”κ°€ (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
274
  if start_timestamp is not None:
275
  sql += f" AND (metadata->>'startTime')::bigint >= {start_timestamp}"
276
 
 
291
 
292
  with conn.cursor() as cur:
293
  print(f"λ‚ μ§œ 검색 쿼리 μ‹€ν–‰: μ‹œμž‘μΌ={start_date}({start_timestamp}), μ’…λ£ŒμΌ={end_date}({end_timestamp})")
294
+ # μ—¬κΈ°μ„œλŠ” limitλ₯Ό νŒŒλΌλ―Έν„°λ‘œ 전달
295
  cur.execute(sql, (limit,))
296
  rows = cur.fetchall()
297
 
 
318
  "topic": get_text_value(metadata, "topic")
319
  }
320
 
321
+ # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš© (이미 KST둜 μ €μž₯λ˜μ–΄ 있음)
322
  if "startTime" in metadata and metadata["startTime"] is not None:
323
  result["startTime"] = metadata["startTime"]
324
 
 
331
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
332
  continue
333
 
334
+ # μž„κ³„κ°’ 필터링 (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
335
  filtered_results = [r for r in results if r["similarityScore"] >= threshold]
 
336
 
337
  if len(filtered_results) > 0:
338
+ print(f"λ‚ μ§œ 검색 - μž„κ³„κ°’({threshold}) 이상 κ²°κ³Ό: {len(filtered_results)}개 / 전체 {len(results)}개")
339
  print(f"λ‚ μ§œ 검색 - κ°€μž₯ 높은 μœ μ‚¬λ„ 점수: {filtered_results[0]['similarityScore']}")
340
  print(f"λ‚ μ§œ 검색 - μƒμœ„ κ²°κ³Ό μ±—ID: {filtered_results[0].get('chatId')}, μ‹œμž‘μ‹œκ°„: {filtered_results[0].get('startTime')}")
341
+ else:
342
+ print(f"λ‚ μ§œ 검색 - μž„κ³„κ°’({threshold}) μ΄μƒμ˜ κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€")
343
 
344
  return filtered_results
345
 
 
351
  if 'conn' in locals():
352
  conn.close()
353
 
354
+ # Gradio μ›Ή μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
355
  with gr.Blocks() as demo:
356
+ gr.Markdown("# μ±„νŒ… 뢄석 검색")
357
  gr.Interface(fn=search_similar_chat, inputs=["text", "number"], outputs="json", api_name="search_similar_chat")
358
  gr.Interface(fn=search_similar_chat_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chat_by_date")
359
 
360
  if __name__ == "__main__":
361
+ demo.launch(server_name="0.0.0.0", server_port=7860)