Jake-seong commited on
Commit
a9e3ab0
ยท
verified ยท
1 Parent(s): 41d679b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -22
app.py CHANGED
@@ -79,49 +79,95 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
79
  conn = get_db_conn()
80
  register_vector(conn)
81
 
82
- # Java ๋ฐฉ์‹: ๋ฌธ์ž์—ด ํฌ๋งทํŒ… ์‚ฌ์šฉํ•œ SQL ์ฟผ๋ฆฌ
83
  sql = f"""
84
  WITH embeddings AS (
85
  SELECT
86
  id,
87
  metadata,
88
  content,
89
- CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> '{query_vector}'::vector) ELSE 0 END * {full_w} as full_sim,
90
- CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> '{query_vector}'::vector) ELSE 0 END * {topic_w} as topic_sim,
91
- CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> '{query_vector}'::vector) ELSE 0 END * {customer_w} as customer_sim,
92
- CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> '{query_vector}'::vector) ELSE 0 END * {agent_w} as agent_sim
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  FROM vector_store_multi_embeddings
94
- WHERE full_embedding IS NOT NULL
95
- OR topic_embedding IS NOT NULL
96
- OR customer_embedding IS NOT NULL
97
- OR agent_embedding IS NOT NULL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
  SELECT
100
  id,
101
  metadata,
102
  content,
103
- (full_sim + topic_sim + customer_sim + agent_sim) as combined_similarity
104
- FROM embeddings
 
 
 
 
 
105
  ORDER BY combined_similarity DESC
106
  LIMIT {limit}
107
  """
108
 
109
  with conn.cursor() as cur:
110
- print(f"์ฟผ๋ฆฌ ์‹คํ–‰ - Java ๋ฐฉ์‹ ํฌ๋งทํŒ…, ๊ฐ€์ค‘์น˜ ์„ค์ •={full_w}, {topic_w}, {customer_w}, {agent_w}, ๊ฒฐ๊ณผ ์ œํ•œ={limit}")
111
  # Java ๋ฐฉ์‹: ๋งค๊ฐœ๋ณ€์ˆ˜ ์—†์ด ์ง์ ‘ ์ฟผ๋ฆฌ ์‹คํ–‰
112
  cur.execute(sql)
113
  rows = cur.fetchall()
114
 
115
  print(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: ์ด {len(rows)}๊ฐœ ๋ฐ์ดํ„ฐ ์กฐํšŒ๋จ")
116
  if len(rows) > 0:
117
- print(f"์ฒซ ๋ฒˆ์งธ ๊ฒฐ๊ณผ ID: {rows[0][0]}, ์œ ์‚ฌ๋„: {float(rows[0][3])}")
 
118
 
119
  results = []
120
  for row in rows:
121
  id_val = row[0]
122
  metadata_json = row[1]
123
  content = row[2]
124
- similarity_score = float(row[3])
 
 
 
 
125
 
126
  # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํŒŒ์‹ฑ
127
  try:
@@ -132,7 +178,13 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
132
  "similarityScore": similarity_score,
133
  "content": content,
134
  "chatId": get_text_value(metadata, "chatId"),
135
- "topic": get_text_value(metadata, "topic")
 
 
 
 
 
 
136
  }
137
 
138
  # ์‹œ๊ฐ„ ํ•„๋“œ ๋ณ€ํ™˜ ์—†์ด ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
@@ -148,15 +200,14 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
148
  print(f"๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {metadata_json[:200]}...")
149
  continue
150
 
151
- # ์ž„๊ณ„๊ฐ’ ํ•„ํ„ฐ๋ง
152
- filtered_results = [r for r in results if r["similarityScore"] >= threshold]
153
- print(f"์ž„๊ณ„๊ฐ’({threshold}) ์ด์ƒ ๊ฒฐ๊ณผ: {len(filtered_results)}๊ฐœ / ์ „์ฒด {len(results)}๊ฐœ")
154
 
155
- if len(filtered_results) > 0:
156
- print(f"๊ฐ€์žฅ ๋†’์€ ์œ ์‚ฌ๋„ ์ ์ˆ˜: {filtered_results[0]['similarityScore']}")
157
- print(f"์ƒ์œ„ ๊ฒฐ๊ณผ ์ฑ—ID: {filtered_results[0].get('chatId')}, ์ฃผ์ œ: {filtered_results[0].get('topic', '')[:50]}...")
 
158
 
159
- return filtered_results
160
 
161
  except Exception as e:
162
  print(f"๋‹ค์ค‘ ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
 
79
  conn = get_db_conn()
80
  register_vector(conn)
81
 
82
+ # ๊ฐœ์„ ๋œ SQL ์ฟผ๋ฆฌ - ๊ฐ€์ค‘ ํ‰๊ท  ๋ฐฉ์‹์œผ๋กœ ๊ณ„์‚ฐํ•˜๊ณ  ๊ฐ ์ž„๋ฒ ๋”ฉ ์œ ํ˜•๋ณ„ ์ ์ˆ˜ ํฌํ•จ
83
  sql = f"""
84
  WITH embeddings AS (
85
  SELECT
86
  id,
87
  metadata,
88
  content,
89
+ -- ๊ฐ ์ž„๋ฒ ๋”ฉ ์œ ํ˜•๋ณ„ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ (NULL์ด ์•„๋‹Œ ๊ฒฝ์šฐ๋งŒ)
90
+ CASE WHEN full_embedding IS NOT NULL
91
+ THEN 1 - (full_embedding <=> '{query_vector}'::vector)
92
+ ELSE NULL END as full_sim,
93
+
94
+ CASE WHEN topic_embedding IS NOT NULL
95
+ THEN 1 - (topic_embedding <=> '{query_vector}'::vector)
96
+ ELSE NULL END as topic_sim,
97
+
98
+ CASE WHEN customer_embedding IS NOT NULL
99
+ THEN 1 - (customer_embedding <=> '{query_vector}'::vector)
100
+ ELSE NULL END as customer_sim,
101
+
102
+ CASE WHEN agent_embedding IS NOT NULL
103
+ THEN 1 - (agent_embedding <=> '{query_vector}'::vector)
104
+ ELSE NULL END as agent_sim,
105
+
106
+ -- ์œ ํšจํ•œ ์ž„๋ฒ ๋”ฉ ์นด์šดํŠธ (0์œผ๋กœ ๋‚˜๋ˆ„๊ธฐ ๋ฐฉ์ง€)
107
+ (CASE WHEN full_embedding IS NOT NULL THEN 1 ELSE 0 END +
108
+ CASE WHEN topic_embedding IS NOT NULL THEN 1 ELSE 0 END +
109
+ CASE WHEN customer_embedding IS NOT NULL THEN 1 ELSE 0 END +
110
+ CASE WHEN agent_embedding IS NOT NULL THEN 1 ELSE 0 END) as valid_count
111
  FROM vector_store_multi_embeddings
112
+ WHERE (full_embedding IS NOT NULL
113
+ OR topic_embedding IS NOT NULL
114
+ OR customer_embedding IS NOT NULL
115
+ OR agent_embedding IS NOT NULL)
116
+ ),
117
+ weighted_scores AS (
118
+ SELECT
119
+ id,
120
+ metadata,
121
+ content,
122
+ full_sim,
123
+ topic_sim,
124
+ customer_sim,
125
+ agent_sim,
126
+ valid_count,
127
+
128
+ -- ๊ฐ€์ค‘์น˜๋ฅผ ์ ์šฉํ•œ ์ด ์œ ์‚ฌ๋„ ์ ์ˆ˜ ๊ณ„์‚ฐ
129
+ (COALESCE(full_sim, 0) * {full_w} +
130
+ COALESCE(topic_sim, 0) * {topic_w} +
131
+ COALESCE(customer_sim, 0) * {customer_w} +
132
+ COALESCE(agent_sim, 0) * {agent_w}) as weighted_sum
133
+ FROM embeddings
134
  )
135
  SELECT
136
  id,
137
  metadata,
138
  content,
139
+ full_sim,
140
+ topic_sim,
141
+ customer_sim,
142
+ agent_sim,
143
+ weighted_sum as combined_similarity
144
+ FROM weighted_scores
145
+ WHERE weighted_sum >= {threshold}
146
  ORDER BY combined_similarity DESC
147
  LIMIT {limit}
148
  """
149
 
150
  with conn.cursor() as cur:
151
+ print(f"์ฟผ๋ฆฌ ์‹คํ–‰ - ๊ฐœ์„ ๋œ ๋ฐฉ์‹, ๊ฐ€์ค‘์น˜ ์„ค์ •={full_w}, {topic_w}, {customer_w}, {agent_w}, ๊ฒฐ๊ณผ ์ œํ•œ={limit}")
152
  # Java ๋ฐฉ์‹: ๋งค๊ฐœ๋ณ€์ˆ˜ ์—†์ด ์ง์ ‘ ์ฟผ๋ฆฌ ์‹คํ–‰
153
  cur.execute(sql)
154
  rows = cur.fetchall()
155
 
156
  print(f"๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: ์ด {len(rows)}๊ฐœ ๋ฐ์ดํ„ฐ ์กฐํšŒ๋จ")
157
  if len(rows) > 0:
158
+ print(f"์ฒซ ๋ฒˆ์งธ ๊ฒฐ๊ณผ ID: {rows[0][0]}, ์œ ์‚ฌ๋„: {float(rows[0][7])}")
159
+ print(f"์ฒซ ๋ฒˆ์งธ ๊ฒฐ๊ณผ ์„ธ๋ถ€ ์œ ์‚ฌ๋„ - full: {rows[0][3]}, topic: {rows[0][4]}, customer: {rows[0][5]}, agent: {rows[0][6]}")
160
 
161
  results = []
162
  for row in rows:
163
  id_val = row[0]
164
  metadata_json = row[1]
165
  content = row[2]
166
+ full_similarity = None if row[3] is None else float(row[3])
167
+ topic_similarity = None if row[4] is None else float(row[4])
168
+ customer_similarity = None if row[5] is None else float(row[5])
169
+ agent_similarity = None if row[6] is None else float(row[6])
170
+ similarity_score = float(row[7])
171
 
172
  # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํŒŒ์‹ฑ
173
  try:
 
178
  "similarityScore": similarity_score,
179
  "content": content,
180
  "chatId": get_text_value(metadata, "chatId"),
181
+ "topic": get_text_value(metadata, "topic"),
182
+ "similarityDetails": {
183
+ "full": full_similarity,
184
+ "topic": topic_similarity,
185
+ "customer": customer_similarity,
186
+ "agent": agent_similarity
187
+ }
188
  }
189
 
190
  # ์‹œ๊ฐ„ ํ•„๋“œ ๋ณ€ํ™˜ ์—†์ด ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
 
200
  print(f"๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {metadata_json[:200]}...")
201
  continue
202
 
203
+ print(f"์ž„๊ณ„๊ฐ’({threshold}) ์ด์ƒ ๊ฒฐ๊ณผ: {len(results)}๊ฐœ")
 
 
204
 
205
+ if len(results) > 0:
206
+ print(f"๊ฐ€์žฅ ๋†’์€ ์œ ์‚ฌ๋„ ์ ์ˆ˜: {results[0]['similarityScore']}")
207
+ print(f"์ƒ์œ„ ๊ฒฐ๊ณผ ์ฑ—ID: {results[0].get('chatId')}, ์ฃผ์ œ: {results[0].get('topic', '')[:50]}...")
208
+ print(f"์ƒ์œ„ ๊ฒฐ๊ณผ ์„ธ๋ถ€ ์œ ์‚ฌ๋„: {results[0]['similarityDetails']}")
209
 
210
+ return results
211
 
212
  except Exception as e:
213
  print(f"๋‹ค์ค‘ ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")