Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
| 6 |
from typing import List, Dict, Tuple, Any
|
| 7 |
from pgvector.psycopg2 import register_vector
|
| 8 |
import numpy as np
|
| 9 |
-
from datetime import datetime, timezone
|
| 10 |
|
| 11 |
# κ°μ€μΉ λ° μκ³κ° μ€μ
|
| 12 |
DEFAULT_FULL_WEIGHT = 0.2
|
|
@@ -43,9 +43,8 @@ def get_embedding(text: str) -> List[float]:
|
|
| 43 |
try:
|
| 44 |
response = client.embeddings.create(
|
| 45 |
input=text,
|
| 46 |
-
model="text-embedding-
|
| 47 |
-
encoding_format="float"
|
| 48 |
-
dimensions=1536
|
| 49 |
)
|
| 50 |
# λͺ
μμ μΌλ‘ float32λ‘ λ³ννμ¬ Javaμ float[]μ νΈνλκ² ν¨
|
| 51 |
return np.array(response.data[0].embedding, dtype=np.float32).tolist()
|
|
@@ -167,7 +166,7 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 167 |
"topic": get_text_value(metadata, "topic")
|
| 168 |
}
|
| 169 |
|
| 170 |
-
# μκ° νλ λ³ν μμ΄ κ·Έλλ‘ μ¬μ©
|
| 171 |
if "startTime" in metadata and metadata["startTime"] is not None:
|
| 172 |
result["startTime"] = metadata["startTime"]
|
| 173 |
|
|
@@ -180,7 +179,7 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
|
|
| 180 |
print(f"λ¬Έμ κ° λ°μν λ©νλ°μ΄ν°: {metadata_json[:200]}...")
|
| 181 |
continue
|
| 182 |
|
| 183 |
-
# μκ³κ° νν°λ§
|
| 184 |
filtered_results = [r for r in results if r["similarityScore"] >= threshold]
|
| 185 |
|
| 186 |
if len(filtered_results) > 0:
|
|
@@ -230,27 +229,7 @@ def search_similar_chat_by_date(
|
|
| 230 |
print(f"λ€μ€ μλ² λ© λ μ§ κ²μ μμ: 쿼리='{query}', μμμΌ={start_date}, μ’
λ£μΌ={end_date}, μ΅λ κ²°κ³Ό={limit}")
|
| 231 |
|
| 232 |
try:
|
| 233 |
-
# λ μ§ νν°
|
| 234 |
-
start_timestamp = None
|
| 235 |
-
end_timestamp = None
|
| 236 |
-
|
| 237 |
-
if start_date and start_date.strip():
|
| 238 |
-
try:
|
| 239 |
-
# μλ°μμλ LocalDateTime.parse() μ¬μ©νλ―λ‘ λμΌνκ² κ΅¬ν
|
| 240 |
-
start_datetime = datetime.strptime(start_date + "T00:00:00", '%Y-%m-%dT%H:%M:%S')
|
| 241 |
-
start_timestamp = int(start_datetime.timestamp() * 1000) # λ°λ¦¬μ΄ λ¨μλ‘ λ³ν
|
| 242 |
-
except ValueError as e:
|
| 243 |
-
print(f"μμ λ μ§ νμ μ€λ₯: {str(e)}")
|
| 244 |
-
return []
|
| 245 |
-
|
| 246 |
-
if end_date and end_date.strip():
|
| 247 |
-
try:
|
| 248 |
-
# μλ°μμλ LocalDateTime.parse() μ¬μ©νλ―λ‘ λμΌνκ² κ΅¬ν
|
| 249 |
-
end_datetime = datetime.strptime(end_date + "T23:59:59", '%Y-%m-%dT%H:%M:%S')
|
| 250 |
-
end_timestamp = int(end_datetime.timestamp() * 1000) # λ°λ¦¬μ΄ λ¨μλ‘ λ³ν
|
| 251 |
-
except ValueError as e:
|
| 252 |
-
print(f"μ’
λ£ λ μ§ νμ μ€λ₯: {str(e)}")
|
| 253 |
-
return []
|
| 254 |
|
| 255 |
# 쿼리 μλ² λ© μμ±
|
| 256 |
query_embedding = get_embedding(query)
|
|
@@ -280,12 +259,16 @@ def search_similar_chat_by_date(
|
|
| 280 |
OR agent_embedding IS NOT NULL
|
| 281 |
""" % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w)
|
| 282 |
|
| 283 |
-
# λ μ§ νν° μΆκ°
|
| 284 |
-
if
|
| 285 |
-
|
|
|
|
|
|
|
| 286 |
|
| 287 |
-
if
|
| 288 |
-
|
|
|
|
|
|
|
| 289 |
|
| 290 |
sql += """
|
| 291 |
)
|
|
@@ -300,7 +283,7 @@ def search_similar_chat_by_date(
|
|
| 300 |
"""
|
| 301 |
|
| 302 |
with conn.cursor() as cur:
|
| 303 |
-
print(f"λ μ§ κ²μ 쿼리 μ€ν: μμμΌ={start_date}
|
| 304 |
# μ¬κΈ°μλ limitλ₯Ό νλΌλ―Έν°λ‘ μ λ¬
|
| 305 |
cur.execute(sql, (limit,))
|
| 306 |
rows = cur.fetchall()
|
|
|
|
| 6 |
from typing import List, Dict, Tuple, Any
|
| 7 |
from pgvector.psycopg2 import register_vector
|
| 8 |
import numpy as np
|
| 9 |
+
from datetime import datetime, timezone, timedelta
|
| 10 |
|
| 11 |
# κ°μ€μΉ λ° μκ³κ° μ€μ
|
| 12 |
DEFAULT_FULL_WEIGHT = 0.2
|
|
|
|
| 43 |
try:
|
| 44 |
response = client.embeddings.create(
|
| 45 |
input=text,
|
| 46 |
+
model="text-embedding-ada-002",
|
| 47 |
+
encoding_format="float"
|
|
|
|
| 48 |
)
|
| 49 |
# λͺ
μμ μΌλ‘ float32λ‘ λ³ννμ¬ Javaμ float[]μ νΈνλκ² ν¨
|
| 50 |
return np.array(response.data[0].embedding, dtype=np.float32).tolist()
|
|
|
|
| 166 |
"topic": get_text_value(metadata, "topic")
|
| 167 |
}
|
| 168 |
|
| 169 |
+
# μκ° νλ λ³ν μμ΄ κ·Έλλ‘ μ¬μ©
|
| 170 |
if "startTime" in metadata and metadata["startTime"] is not None:
|
| 171 |
result["startTime"] = metadata["startTime"]
|
| 172 |
|
|
|
|
| 179 |
print(f"λ¬Έμ κ° λ°μν λ©νλ°μ΄ν°: {metadata_json[:200]}...")
|
| 180 |
continue
|
| 181 |
|
| 182 |
+
# μκ³κ° νν°λ§
|
| 183 |
filtered_results = [r for r in results if r["similarityScore"] >= threshold]
|
| 184 |
|
| 185 |
if len(filtered_results) > 0:
|
|
|
|
| 229 |
print(f"λ€μ€ μλ² λ© λ μ§ κ²μ μμ: 쿼리='{query}', μμμΌ={start_date}, μ’
λ£μΌ={end_date}, μ΅λ κ²°κ³Ό={limit}")
|
| 230 |
|
| 231 |
try:
|
| 232 |
+
# λ μ§ νν° μμ±
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
# 쿼리 μλ² λ© μμ±
|
| 235 |
query_embedding = get_embedding(query)
|
|
|
|
| 259 |
OR agent_embedding IS NOT NULL
|
| 260 |
""" % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w)
|
| 261 |
|
| 262 |
+
# λ μ§ νν° μΆκ°
|
| 263 |
+
if start_date and start_date.strip():
|
| 264 |
+
# μμ μκ° μΆκ°νμ¬ ISO νμμΌλ‘ λΉκ΅
|
| 265 |
+
iso_start_date = start_date + "T00:00:00"
|
| 266 |
+
sql += f" AND metadata->>'startTime' >= '{iso_start_date}'"
|
| 267 |
|
| 268 |
+
if end_date and end_date.strip():
|
| 269 |
+
# μ’
λ£ μκ° μΆκ°νμ¬ ISO νμμΌλ‘ λΉκ΅
|
| 270 |
+
iso_end_date = end_date + "T23:59:59"
|
| 271 |
+
sql += f" AND metadata->>'startTime' <= '{iso_end_date}'"
|
| 272 |
|
| 273 |
sql += """
|
| 274 |
)
|
|
|
|
| 283 |
"""
|
| 284 |
|
| 285 |
with conn.cursor() as cur:
|
| 286 |
+
print(f"λ μ§ κ²μ 쿼리 μ€ν: μμμΌ={start_date}, μ’
λ£μΌ={end_date}")
|
| 287 |
# μ¬κΈ°μλ limitλ₯Ό νλΌλ―Έν°λ‘ μ λ¬
|
| 288 |
cur.execute(sql, (limit,))
|
| 289 |
rows = cur.fetchall()
|