Jake-seong commited on
Commit
7e64a83
·
verified ·
1 Parent(s): 269f105

유사도 정규화 처리

Browse files
Files changed (1) hide show
  1. app.py +66 -67
app.py CHANGED
@@ -18,19 +18,17 @@ def get_db_conn():
18
  password=os.environ["VECTOR_SECRET"]
19
  )
20
 
21
- client = OpenAI() # 환경변수에 OPENAI_API_KEY가 있으면 자동 인식
22
 
23
  def get_embedding(text: str) -> List[float]:
24
- """
25
- 텍스트를 임베딩 벡터로 변환합니다.
26
- """
27
  response = client.embeddings.create(
28
  input=text,
29
  model="text-embedding-3-small"
30
  )
31
  return response.data[0].embedding
32
 
33
- def search_similar_chats(query: str, maxResults: int = 10000) -> List[Dict]:
34
  """
35
  유사한 채팅 문서를 검색합니다.
36
  Args:
@@ -39,33 +37,38 @@ def search_similar_chats(query: str, maxResults: int = 10000) -> List[Dict]:
39
  Returns:
40
  List[Dict]: 검색 결과 목록
41
  """
42
- embedding = np.array(get_embedding(query)) # numpy array로 변환
43
  conn = get_db_conn()
44
- register_vector(conn) # 벡터 타입 자동 변환 지원
45
- with conn.cursor() as cur:
46
- cur.execute("""
47
- SELECT id, metadata, content, embedding <#> %s AS distance
48
- FROM vector_store
49
- ORDER BY embedding <#> %s
50
- LIMIT %s
51
- """, (embedding, embedding, maxResults))
52
- rows = cur.fetchall()
53
- conn.close()
54
- return [
55
- {
 
 
 
56
  "id": row[0],
57
  "metadata": row[1],
58
  "content": row[2],
59
- "distance": row[3]
60
- }
61
- for row in rows
62
- ]
 
 
63
 
64
  def search_similar_chats_by_date(
65
  query: str,
66
  startDate: str = None,
67
  endDate: str = None,
68
- maxResults: int = 10000
69
  ) -> List[Dict]:
70
  """
71
  지정된 날짜 범위에 해당하는 유사한 채팅 문서를 검색합니다.
@@ -78,54 +81,50 @@ def search_similar_chats_by_date(
78
  Returns:
79
  List[Dict]: 검색 결과 목록
80
  """
81
- if startDate not in (None, "") and endDate not in (None, ""):
82
- try:
83
- start_dt = datetime.strptime(startDate, "%Y-%m-%d")
84
- end_dt = datetime.strptime(endDate, "%Y-%m-%d")
85
- except Exception as e:
86
- raise ValueError(f"날짜 형식이 올바르지 않습니다.: {e}")
87
-
88
- embedding = np.array(get_embedding(query)) # numpy array로 변환
89
  conn = get_db_conn()
90
- register_vector(conn) # 벡터 타입 자동 변환 지원
91
-
92
- # SQL 쿼리 구성
93
- sql_query = """
94
- SELECT id, metadata, content, embedding <#> %s AS distance
95
- FROM vector_store
96
- WHERE (metadata->>'startTime') IS NOT NULL
97
- AND (metadata->>'startTime') <> ''
98
- """
99
-
100
- params = [embedding]
101
-
102
- # 날짜 필터 추가
103
- if startDate not in (None, ""):
104
- sql_query += " AND (metadata->>'startTime')::timestamp >= %s"
105
- params.append(startDate)
106
-
107
- if endDate not in (None, ""):
108
- sql_query += " AND (metadata->>'startTime')::timestamp <= %s"
109
- params.append(endDate)
110
-
111
- # 벡터 거리로 정렬하고 결과 제한
112
- sql_query += " ORDER BY embedding <#> %s LIMIT %s"
113
- params.extend([embedding, maxResults])
114
-
115
- with conn.cursor() as cur:
116
- cur.execute(sql_query, tuple(params))
117
- rows = cur.fetchall()
118
- conn.close()
119
 
120
- return [
121
- {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  "id": row[0],
123
  "metadata": row[1],
124
  "content": row[2],
125
- "distance": row[3]
126
- }
127
- for row in rows
128
- ]
 
 
129
 
130
  # Gradio Blocks에 함수 등록
131
  with gr.Blocks() as demo:
@@ -134,4 +133,4 @@ with gr.Blocks() as demo:
134
  gr.Interface(fn=search_similar_chats_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chats_by_date")
135
 
136
  if __name__ == "__main__":
137
- demo.launch(mcp_server=True)
 
18
  password=os.environ["VECTOR_SECRET"]
19
  )
20
 
21
+ client = OpenAI()
22
 
23
  def get_embedding(text: str) -> List[float]:
24
+ """텍스트를 임베딩 벡터로 변환합니다."""
 
 
25
  response = client.embeddings.create(
26
  input=text,
27
  model="text-embedding-3-small"
28
  )
29
  return response.data[0].embedding
30
 
31
+ def search_similar_chats(query: str, maxResults: int = 200) -> List[Dict]:
32
  """
33
  유사한 채팅 문서를 검색합니다.
34
  Args:
 
37
  Returns:
38
  List[Dict]: 검색 결과 목록
39
  """
40
+ embedding = np.array(get_embedding(query))
41
  conn = get_db_conn()
42
+ register_vector(conn)
43
+
44
+ try:
45
+ with conn.cursor() as cur:
46
+ # 코사인 유사도 연산자 변경 (<=> 사용)
47
+ cur.execute("""
48
+ SELECT id, metadata, content,
49
+ 1 - (embedding <=> %s) AS similarity
50
+ FROM vector_store
51
+ ORDER BY similarity DESC
52
+ LIMIT %s
53
+ """, (embedding, maxResults))
54
+
55
+ rows = cur.fetchall()
56
+ return [{
57
  "id": row[0],
58
  "metadata": row[1],
59
  "content": row[2],
60
+ "similarity": float(row[3])
61
+ } for row in rows]
62
+ except Exception as e:
63
+ raise RuntimeError(f"DB 검색 오류: {str(e)}")
64
+ finally:
65
+ conn.close()
66
 
67
  def search_similar_chats_by_date(
68
  query: str,
69
  startDate: str = None,
70
  endDate: str = None,
71
+ maxResults: int = 200
72
  ) -> List[Dict]:
73
  """
74
  지정된 날짜 범위에 해당하는 유사한 채팅 문서를 검색합니다.
 
81
  Returns:
82
  List[Dict]: 검색 결과 목록
83
  """
84
+ try:
85
+ start_dt = datetime.strptime(startDate, "%Y-%m-%d") if startDate else None
86
+ end_dt = datetime.strptime(endDate, "%Y-%m-%d") if endDate else None
87
+ except ValueError as e:
88
+ raise ValueError(f"날짜 형식 오류: {e}")
89
+
90
+ embedding = np.array(get_embedding(query))
 
91
  conn = get_db_conn()
92
+ register_vector(conn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ try:
95
+ with conn.cursor() as cur:
96
+ base_query = """
97
+ SELECT id, metadata, content,
98
+ 1 - (embedding <=> %s) AS similarity
99
+ FROM vector_store
100
+ WHERE 1=1
101
+ """
102
+ params = [embedding]
103
+
104
+ # 동적 쿼리 구성
105
+ if startDate:
106
+ base_query += " AND (metadata->>'startTime')::date >= %s"
107
+ params.append(startDate)
108
+ if endDate:
109
+ base_query += " AND (metadata->>'startTime')::date <= %s"
110
+ params.append(endDate)
111
+
112
+ base_query += " ORDER BY similarity DESC LIMIT %s"
113
+ params.append(maxResults)
114
+
115
+ cur.execute(base_query, tuple(params))
116
+ rows = cur.fetchall()
117
+
118
+ return [{
119
  "id": row[0],
120
  "metadata": row[1],
121
  "content": row[2],
122
+ "similarity": float(row[3])
123
+ } for row in rows]
124
+ except Exception as e:
125
+ raise RuntimeError(f"DB 검색 오류: {str(e)}")
126
+ finally:
127
+ conn.close()
128
 
129
  # Gradio Blocks에 함수 등록
130
  with gr.Blocks() as demo:
 
133
  gr.Interface(fn=search_similar_chats_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chats_by_date")
134
 
135
  if __name__ == "__main__":
136
+ demo.launch(mcp_server=True)