Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -79,49 +79,95 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 79 |
conn = get_db_conn()
|
| 80 |
register_vector(conn)
|
| 81 |
|
| 82 |
-
#
|
| 83 |
sql = f"""
|
| 84 |
WITH embeddings AS (
|
| 85 |
SELECT
|
| 86 |
id,
|
| 87 |
metadata,
|
| 88 |
content,
|
| 89 |
-
|
| 90 |
-
CASE WHEN
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
FROM vector_store_multi_embeddings
|
| 94 |
-
WHERE full_embedding IS NOT NULL
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
SELECT
|
| 100 |
id,
|
| 101 |
metadata,
|
| 102 |
content,
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
ORDER BY combined_similarity DESC
|
| 106 |
LIMIT {limit}
|
| 107 |
"""
|
| 108 |
|
| 109 |
with conn.cursor() as cur:
|
| 110 |
-
print(f"์ฟผ๋ฆฌ ์คํ -
|
| 111 |
# Java ๋ฐฉ์: ๋งค๊ฐ๋ณ์ ์์ด ์ง์ ์ฟผ๋ฆฌ ์คํ
|
| 112 |
cur.execute(sql)
|
| 113 |
rows = cur.fetchall()
|
| 114 |
|
| 115 |
print(f"๊ฒ์ ๊ฒฐ๊ณผ: ์ด {len(rows)}๊ฐ ๋ฐ์ดํฐ ์กฐํ๋จ")
|
| 116 |
if len(rows) > 0:
|
| 117 |
-
print(f"์ฒซ ๋ฒ์งธ ๊ฒฐ๊ณผ ID: {rows[0][0]}, ์ ์ฌ๋: {float(rows[0][
|
|
|
|
| 118 |
|
| 119 |
results = []
|
| 120 |
for row in rows:
|
| 121 |
id_val = row[0]
|
| 122 |
metadata_json = row[1]
|
| 123 |
content = row[2]
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
# ๋ฉํ๋ฐ์ดํฐ ํ์ฑ
|
| 127 |
try:
|
|
@@ -132,7 +178,13 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 132 |
"similarityScore": similarity_score,
|
| 133 |
"content": content,
|
| 134 |
"chatId": get_text_value(metadata, "chatId"),
|
| 135 |
-
"topic": get_text_value(metadata, "topic")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
}
|
| 137 |
|
| 138 |
# ์๊ฐ ํ๋ ๋ณํ ์์ด ๊ทธ๋๋ก ์ฌ์ฉ
|
|
@@ -148,15 +200,14 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 148 |
print(f"๋ฌธ์ ๊ฐ ๋ฐ์ํ ๋ฉํ๋ฐ์ดํฐ: {metadata_json[:200]}...")
|
| 149 |
continue
|
| 150 |
|
| 151 |
-
|
| 152 |
-
filtered_results = [r for r in results if r["similarityScore"] >= threshold]
|
| 153 |
-
print(f"์๊ณ๊ฐ({threshold}) ์ด์ ๊ฒฐ๊ณผ: {len(filtered_results)}๊ฐ / ์ ์ฒด {len(results)}๊ฐ")
|
| 154 |
|
| 155 |
-
if len(
|
| 156 |
-
print(f"๊ฐ์ฅ ๋์ ์ ์ฌ๋ ์ ์: {
|
| 157 |
-
print(f"์์ ๊ฒฐ๊ณผ ์ฑID: {
|
|
|
|
| 158 |
|
| 159 |
-
return
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
print(f"๋ค์ค ์๋ฒ ๋ฉ ๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|
|
|
|
| 79 |
conn = get_db_conn()
|
| 80 |
register_vector(conn)
|
| 81 |
|
| 82 |
+
# ๊ฐ์ ๋ SQL ์ฟผ๋ฆฌ - ๊ฐ์ค ํ๊ท ๋ฐฉ์์ผ๋ก ๊ณ์ฐํ๊ณ ๊ฐ ์๋ฒ ๋ฉ ์ ํ๋ณ ์ ์ ํฌํจ
|
| 83 |
sql = f"""
|
| 84 |
WITH embeddings AS (
|
| 85 |
SELECT
|
| 86 |
id,
|
| 87 |
metadata,
|
| 88 |
content,
|
| 89 |
+
-- ๊ฐ ์๋ฒ ๋ฉ ์ ํ๋ณ ์ ์ฌ๋ ๊ณ์ฐ (NULL์ด ์๋ ๊ฒฝ์ฐ๋ง)
|
| 90 |
+
CASE WHEN full_embedding IS NOT NULL
|
| 91 |
+
THEN 1 - (full_embedding <=> '{query_vector}'::vector)
|
| 92 |
+
ELSE NULL END as full_sim,
|
| 93 |
+
|
| 94 |
+
CASE WHEN topic_embedding IS NOT NULL
|
| 95 |
+
THEN 1 - (topic_embedding <=> '{query_vector}'::vector)
|
| 96 |
+
ELSE NULL END as topic_sim,
|
| 97 |
+
|
| 98 |
+
CASE WHEN customer_embedding IS NOT NULL
|
| 99 |
+
THEN 1 - (customer_embedding <=> '{query_vector}'::vector)
|
| 100 |
+
ELSE NULL END as customer_sim,
|
| 101 |
+
|
| 102 |
+
CASE WHEN agent_embedding IS NOT NULL
|
| 103 |
+
THEN 1 - (agent_embedding <=> '{query_vector}'::vector)
|
| 104 |
+
ELSE NULL END as agent_sim,
|
| 105 |
+
|
| 106 |
+
-- ์ ํจํ ์๋ฒ ๋ฉ ์นด์ดํธ (0์ผ๋ก ๋๋๊ธฐ ๋ฐฉ์ง)
|
| 107 |
+
(CASE WHEN full_embedding IS NOT NULL THEN 1 ELSE 0 END +
|
| 108 |
+
CASE WHEN topic_embedding IS NOT NULL THEN 1 ELSE 0 END +
|
| 109 |
+
CASE WHEN customer_embedding IS NOT NULL THEN 1 ELSE 0 END +
|
| 110 |
+
CASE WHEN agent_embedding IS NOT NULL THEN 1 ELSE 0 END) as valid_count
|
| 111 |
FROM vector_store_multi_embeddings
|
| 112 |
+
WHERE (full_embedding IS NOT NULL
|
| 113 |
+
OR topic_embedding IS NOT NULL
|
| 114 |
+
OR customer_embedding IS NOT NULL
|
| 115 |
+
OR agent_embedding IS NOT NULL)
|
| 116 |
+
),
|
| 117 |
+
weighted_scores AS (
|
| 118 |
+
SELECT
|
| 119 |
+
id,
|
| 120 |
+
metadata,
|
| 121 |
+
content,
|
| 122 |
+
full_sim,
|
| 123 |
+
topic_sim,
|
| 124 |
+
customer_sim,
|
| 125 |
+
agent_sim,
|
| 126 |
+
valid_count,
|
| 127 |
+
|
| 128 |
+
-- ๊ฐ์ค์น๋ฅผ ์ ์ฉํ ์ด ์ ์ฌ๋ ์ ์ ๊ณ์ฐ
|
| 129 |
+
(COALESCE(full_sim, 0) * {full_w} +
|
| 130 |
+
COALESCE(topic_sim, 0) * {topic_w} +
|
| 131 |
+
COALESCE(customer_sim, 0) * {customer_w} +
|
| 132 |
+
COALESCE(agent_sim, 0) * {agent_w}) as weighted_sum
|
| 133 |
+
FROM embeddings
|
| 134 |
)
|
| 135 |
SELECT
|
| 136 |
id,
|
| 137 |
metadata,
|
| 138 |
content,
|
| 139 |
+
full_sim,
|
| 140 |
+
topic_sim,
|
| 141 |
+
customer_sim,
|
| 142 |
+
agent_sim,
|
| 143 |
+
weighted_sum as combined_similarity
|
| 144 |
+
FROM weighted_scores
|
| 145 |
+
WHERE weighted_sum >= {threshold}
|
| 146 |
ORDER BY combined_similarity DESC
|
| 147 |
LIMIT {limit}
|
| 148 |
"""
|
| 149 |
|
| 150 |
with conn.cursor() as cur:
|
| 151 |
+
print(f"์ฟผ๋ฆฌ ์คํ - ๊ฐ์ ๋ ๋ฐฉ์, ๊ฐ์ค์น ์ค์ ={full_w}, {topic_w}, {customer_w}, {agent_w}, ๊ฒฐ๊ณผ ์ ํ={limit}")
|
| 152 |
# Java ๋ฐฉ์: ๋งค๊ฐ๋ณ์ ์์ด ์ง์ ์ฟผ๋ฆฌ ์คํ
|
| 153 |
cur.execute(sql)
|
| 154 |
rows = cur.fetchall()
|
| 155 |
|
| 156 |
print(f"๊ฒ์ ๊ฒฐ๊ณผ: ์ด {len(rows)}๊ฐ ๋ฐ์ดํฐ ์กฐํ๋จ")
|
| 157 |
if len(rows) > 0:
|
| 158 |
+
print(f"์ฒซ ๋ฒ์งธ ๊ฒฐ๊ณผ ID: {rows[0][0]}, ์ ์ฌ๋: {float(rows[0][7])}")
|
| 159 |
+
print(f"์ฒซ ๋ฒ์งธ ๊ฒฐ๊ณผ ์ธ๋ถ ์ ์ฌ๋ - full: {rows[0][3]}, topic: {rows[0][4]}, customer: {rows[0][5]}, agent: {rows[0][6]}")
|
| 160 |
|
| 161 |
results = []
|
| 162 |
for row in rows:
|
| 163 |
id_val = row[0]
|
| 164 |
metadata_json = row[1]
|
| 165 |
content = row[2]
|
| 166 |
+
full_similarity = None if row[3] is None else float(row[3])
|
| 167 |
+
topic_similarity = None if row[4] is None else float(row[4])
|
| 168 |
+
customer_similarity = None if row[5] is None else float(row[5])
|
| 169 |
+
agent_similarity = None if row[6] is None else float(row[6])
|
| 170 |
+
similarity_score = float(row[7])
|
| 171 |
|
| 172 |
# ๋ฉํ๋ฐ์ดํฐ ํ์ฑ
|
| 173 |
try:
|
|
|
|
| 178 |
"similarityScore": similarity_score,
|
| 179 |
"content": content,
|
| 180 |
"chatId": get_text_value(metadata, "chatId"),
|
| 181 |
+
"topic": get_text_value(metadata, "topic"),
|
| 182 |
+
"similarityDetails": {
|
| 183 |
+
"full": full_similarity,
|
| 184 |
+
"topic": topic_similarity,
|
| 185 |
+
"customer": customer_similarity,
|
| 186 |
+
"agent": agent_similarity
|
| 187 |
+
}
|
| 188 |
}
|
| 189 |
|
| 190 |
# ์๊ฐ ํ๋ ๋ณํ ์์ด ๊ทธ๋๋ก ์ฌ์ฉ
|
|
|
|
| 200 |
print(f"๋ฌธ์ ๊ฐ ๋ฐ์ํ ๋ฉํ๋ฐ์ดํฐ: {metadata_json[:200]}...")
|
| 201 |
continue
|
| 202 |
|
| 203 |
+
print(f"์๊ณ๊ฐ({threshold}) ์ด์ ๊ฒฐ๊ณผ: {len(results)}๊ฐ")
|
|
|
|
|
|
|
| 204 |
|
| 205 |
+
if len(results) > 0:
|
| 206 |
+
print(f"๊ฐ์ฅ ๋์ ์ ์ฌ๋ ์ ์: {results[0]['similarityScore']}")
|
| 207 |
+
print(f"์์ ๊ฒฐ๊ณผ ์ฑID: {results[0].get('chatId')}, ์ฃผ์ : {results[0].get('topic', '')[:50]}...")
|
| 208 |
+
print(f"์์ ๊ฒฐ๊ณผ ์ธ๋ถ ์ ์ฌ๋: {results[0]['similarityDetails']}")
|
| 209 |
|
| 210 |
+
return results
|
| 211 |
|
| 212 |
except Exception as e:
|
| 213 |
print(f"๋ค์ค ์๋ฒ ๋ฉ ๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}")
|