Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,10 +35,6 @@ def get_embedding(text: str) -> List[float]:
|
|
| 35 |
)
|
| 36 |
return response.data[0].embedding
|
| 37 |
|
| 38 |
-
def format_vector_for_pg(vector: List[float]) -> str:
|
| 39 |
-
"""๋ฒกํฐ๋ฅผ PostgreSQL ํฌ๋งท์ผ๋ก ๋ณํํฉ๋๋ค."""
|
| 40 |
-
return f"[{','.join(str(x) for x in vector)}]"
|
| 41 |
-
|
| 42 |
def get_text_value(node, field_name):
|
| 43 |
"""JSON ๋
ธ๋์์ ํ
์คํธ ๊ฐ์ ์์ ํ๊ฒ ์ถ์ถํฉ๋๋ค."""
|
| 44 |
if node and field_name in node and node[field_name] is not None:
|
|
@@ -70,23 +66,22 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 70 |
try:
|
| 71 |
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์์ฑ
|
| 72 |
query_embedding = np.array(get_embedding(query))
|
| 73 |
-
query_vector = format_vector_for_pg(query_embedding)
|
| 74 |
|
| 75 |
# DB ์ฐ๊ฒฐ
|
| 76 |
conn = get_db_conn()
|
| 77 |
register_vector(conn)
|
| 78 |
|
| 79 |
-
# ์ฌ๋ฌ ํ๋๋ฅผ ๊ฐ์ค์น๋ก ์กฐํฉํ ์ ์ฌ๋ ๊ฒ์ SQL
|
| 80 |
-
sql =
|
| 81 |
WITH embeddings AS (
|
| 82 |
SELECT
|
| 83 |
id,
|
| 84 |
metadata,
|
| 85 |
content,
|
| 86 |
-
CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=>
|
| 87 |
-
CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=>
|
| 88 |
-
CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=>
|
| 89 |
-
CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=>
|
| 90 |
FROM vector_store_multi_embeddings
|
| 91 |
WHERE full_embedding IS NOT NULL
|
| 92 |
OR topic_embedding IS NOT NULL
|
|
@@ -104,7 +99,15 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 104 |
"""
|
| 105 |
|
| 106 |
with conn.cursor() as cur:
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
rows = cur.fetchall()
|
| 109 |
|
| 110 |
results = []
|
|
@@ -204,23 +207,22 @@ def search_similar_chat_by_date(
|
|
| 204 |
|
| 205 |
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์์ฑ
|
| 206 |
query_embedding = np.array(get_embedding(query))
|
| 207 |
-
query_vector = format_vector_for_pg(query_embedding)
|
| 208 |
|
| 209 |
# DB ์ฐ๊ฒฐ
|
| 210 |
conn = get_db_conn()
|
| 211 |
register_vector(conn)
|
| 212 |
|
| 213 |
-
# ์ฌ๋ฌ ํ๋๋ฅผ ๊ฐ์ค์น๋ก ์กฐํฉํ ์ ์ฌ๋ ๊ฒ์ SQL
|
| 214 |
-
sql =
|
| 215 |
WITH embeddings AS (
|
| 216 |
SELECT
|
| 217 |
id,
|
| 218 |
metadata,
|
| 219 |
content,
|
| 220 |
-
CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=>
|
| 221 |
-
CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=>
|
| 222 |
-
CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=>
|
| 223 |
-
CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=>
|
| 224 |
FROM vector_store_multi_embeddings
|
| 225 |
WHERE full_embedding IS NOT NULL
|
| 226 |
OR topic_embedding IS NOT NULL
|
|
@@ -228,15 +230,20 @@ def search_similar_chat_by_date(
|
|
| 228 |
OR agent_embedding IS NOT NULL
|
| 229 |
"""
|
| 230 |
|
| 231 |
-
params = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
# ๋ ์ง ํํฐ ์ถ๊ฐ
|
| 234 |
if start_timestamp is not None:
|
| 235 |
-
sql +=
|
| 236 |
params.append(start_timestamp)
|
| 237 |
|
| 238 |
if end_timestamp is not None:
|
| 239 |
-
sql +=
|
| 240 |
params.append(end_timestamp)
|
| 241 |
|
| 242 |
sql += """
|
|
|
|
| 35 |
)
|
| 36 |
return response.data[0].embedding
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def get_text_value(node, field_name):
|
| 39 |
"""JSON ๋
ธ๋์์ ํ
์คํธ ๊ฐ์ ์์ ํ๊ฒ ์ถ์ถํฉ๋๋ค."""
|
| 40 |
if node and field_name in node and node[field_name] is not None:
|
|
|
|
| 66 |
try:
|
| 67 |
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์์ฑ
|
| 68 |
query_embedding = np.array(get_embedding(query))
|
|
|
|
| 69 |
|
| 70 |
# DB ์ฐ๊ฒฐ
|
| 71 |
conn = get_db_conn()
|
| 72 |
register_vector(conn)
|
| 73 |
|
| 74 |
+
# ์ฌ๋ฌ ํ๋๋ฅผ ๊ฐ์ค์น๋ก ์กฐํฉํ ์ ์ฌ๋ ๊ฒ์ SQL - ๋งค๊ฐ๋ณ์ํ๋ ์ฟผ๋ฆฌ ์ฌ์ฉ
|
| 75 |
+
sql = """
|
| 76 |
WITH embeddings AS (
|
| 77 |
SELECT
|
| 78 |
id,
|
| 79 |
metadata,
|
| 80 |
content,
|
| 81 |
+
CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> %s::vector) ELSE 0 END * %s as full_sim,
|
| 82 |
+
CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> %s::vector) ELSE 0 END * %s as topic_sim,
|
| 83 |
+
CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> %s::vector) ELSE 0 END * %s as customer_sim,
|
| 84 |
+
CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> %s::vector) ELSE 0 END * %s as agent_sim
|
| 85 |
FROM vector_store_multi_embeddings
|
| 86 |
WHERE full_embedding IS NOT NULL
|
| 87 |
OR topic_embedding IS NOT NULL
|
|
|
|
| 99 |
"""
|
| 100 |
|
| 101 |
with conn.cursor() as cur:
|
| 102 |
+
# ๋งค๊ฐ๋ณ์ํ๋ ์ฟผ๋ฆฌ ์คํ
|
| 103 |
+
params = (
|
| 104 |
+
query_embedding, full_w,
|
| 105 |
+
query_embedding, topic_w,
|
| 106 |
+
query_embedding, customer_w,
|
| 107 |
+
query_embedding, agent_w,
|
| 108 |
+
limit
|
| 109 |
+
)
|
| 110 |
+
cur.execute(sql, params)
|
| 111 |
rows = cur.fetchall()
|
| 112 |
|
| 113 |
results = []
|
|
|
|
| 207 |
|
| 208 |
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์์ฑ
|
| 209 |
query_embedding = np.array(get_embedding(query))
|
|
|
|
| 210 |
|
| 211 |
# DB ์ฐ๊ฒฐ
|
| 212 |
conn = get_db_conn()
|
| 213 |
register_vector(conn)
|
| 214 |
|
| 215 |
+
# ์ฌ๋ฌ ํ๋๋ฅผ ๊ฐ์ค์น๋ก ์กฐํฉํ ์ ์ฌ๋ ๊ฒ์ SQL - ๋งค๊ฐ๋ณ์ํ
|
| 216 |
+
sql = """
|
| 217 |
WITH embeddings AS (
|
| 218 |
SELECT
|
| 219 |
id,
|
| 220 |
metadata,
|
| 221 |
content,
|
| 222 |
+
CASE WHEN full_embedding IS NOT NULL THEN 1 - (full_embedding <=> %s::vector) ELSE 0 END * %s as full_sim,
|
| 223 |
+
CASE WHEN topic_embedding IS NOT NULL THEN 1 - (topic_embedding <=> %s::vector) ELSE 0 END * %s as topic_sim,
|
| 224 |
+
CASE WHEN customer_embedding IS NOT NULL THEN 1 - (customer_embedding <=> %s::vector) ELSE 0 END * %s as customer_sim,
|
| 225 |
+
CASE WHEN agent_embedding IS NOT NULL THEN 1 - (agent_embedding <=> %s::vector) ELSE 0 END * %s as agent_sim
|
| 226 |
FROM vector_store_multi_embeddings
|
| 227 |
WHERE full_embedding IS NOT NULL
|
| 228 |
OR topic_embedding IS NOT NULL
|
|
|
|
| 230 |
OR agent_embedding IS NOT NULL
|
| 231 |
"""
|
| 232 |
|
| 233 |
+
params = [
|
| 234 |
+
query_embedding, full_w,
|
| 235 |
+
query_embedding, topic_w,
|
| 236 |
+
query_embedding, customer_w,
|
| 237 |
+
query_embedding, agent_w
|
| 238 |
+
]
|
| 239 |
|
| 240 |
# ๋ ์ง ํํฐ ์ถ๊ฐ
|
| 241 |
if start_timestamp is not None:
|
| 242 |
+
sql += " AND (metadata->>'startTime')::bigint >= %s"
|
| 243 |
params.append(start_timestamp)
|
| 244 |
|
| 245 |
if end_timestamp is not None:
|
| 246 |
+
sql += " AND (metadata->>'startTime')::bigint <= %s"
|
| 247 |
params.append(end_timestamp)
|
| 248 |
|
| 249 |
sql += """
|