Jake-seong commited on
Commit
6a97a0f
Β·
verified Β·
1 Parent(s): 54857b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -33
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  from typing import List, Dict, Tuple, Any
7
  from pgvector.psycopg2 import register_vector
8
  import numpy as np
9
- from datetime import datetime, timezone
10
 
11
  # κ°€μ€‘μΉ˜ 및 μž„κ³„κ°’ μ„€μ •
12
  DEFAULT_FULL_WEIGHT = 0.2
@@ -43,9 +43,8 @@ def get_embedding(text: str) -> List[float]:
43
  try:
44
  response = client.embeddings.create(
45
  input=text,
46
- model="text-embedding-3-small",
47
- encoding_format="float",
48
- dimensions=1536
49
  )
50
  # λͺ…μ‹œμ μœΌλ‘œ float32둜 λ³€ν™˜ν•˜μ—¬ Java의 float[]와 ν˜Έν™˜λ˜κ²Œ 함
51
  return np.array(response.data[0].embedding, dtype=np.float32).tolist()
@@ -167,7 +166,7 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
167
  "topic": get_text_value(metadata, "topic")
168
  }
169
 
170
- # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš© (이미 KST둜 μ €μž₯λ˜μ–΄ 있음)
171
  if "startTime" in metadata and metadata["startTime"] is not None:
172
  result["startTime"] = metadata["startTime"]
173
 
@@ -180,7 +179,7 @@ def search_similar_chat(query: str, max_results: int = 100) -> List[Dict]:
180
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
181
  continue
182
 
183
- # μž„κ³„κ°’ 필터링 (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
184
  filtered_results = [r for r in results if r["similarityScore"] >= threshold]
185
 
186
  if len(filtered_results) > 0:
@@ -230,27 +229,7 @@ def search_similar_chat_by_date(
230
  print(f"닀쀑 μž„λ² λ”© λ‚ μ§œ 검색 μ‹œμž‘: 쿼리='{query}', μ‹œμž‘μΌ={start_date}, μ’…λ£ŒμΌ={end_date}, μ΅œλŒ€ κ²°κ³Ό={limit}")
231
 
232
  try:
233
- # λ‚ μ§œ ν•„ν„° νŒŒλΌλ―Έν„° 생성 (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
234
- start_timestamp = None
235
- end_timestamp = None
236
-
237
- if start_date and start_date.strip():
238
- try:
239
- # μžλ°”μ—μ„œλŠ” LocalDateTime.parse() μ‚¬μš©ν•˜λ―€λ‘œ λ™μΌν•˜κ²Œ κ΅¬ν˜„
240
- start_datetime = datetime.strptime(start_date + "T00:00:00", '%Y-%m-%dT%H:%M:%S')
241
- start_timestamp = int(start_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
242
- except ValueError as e:
243
- print(f"μ‹œμž‘ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
244
- return []
245
-
246
- if end_date and end_date.strip():
247
- try:
248
- # μžλ°”μ—μ„œλŠ” LocalDateTime.parse() μ‚¬μš©ν•˜λ―€λ‘œ λ™μΌν•˜κ²Œ κ΅¬ν˜„
249
- end_datetime = datetime.strptime(end_date + "T23:59:59", '%Y-%m-%dT%H:%M:%S')
250
- end_timestamp = int(end_datetime.timestamp() * 1000) # λ°€λ¦¬μ΄ˆ λ‹¨μœ„λ‘œ λ³€ν™˜
251
- except ValueError as e:
252
- print(f"μ’…λ£Œ λ‚ μ§œ ν˜•μ‹ 였λ₯˜: {str(e)}")
253
- return []
254
 
255
  # 쿼리 μž„λ² λ”© 생성
256
  query_embedding = get_embedding(query)
@@ -280,12 +259,16 @@ def search_similar_chat_by_date(
280
  OR agent_embedding IS NOT NULL
281
  """ % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w)
282
 
283
- # λ‚ μ§œ ν•„ν„° μΆ”κ°€ (μžλ°” μ½”λ“œμ™€ λ™μΌν•˜κ²Œ κ΅¬ν˜„)
284
- if start_timestamp is not None:
285
- sql += f" AND (metadata->>'startTime')::bigint >= {start_timestamp}"
 
 
286
 
287
- if end_timestamp is not None:
288
- sql += f" AND (metadata->>'startTime')::bigint <= {end_timestamp}"
 
 
289
 
290
  sql += """
291
  )
@@ -300,7 +283,7 @@ def search_similar_chat_by_date(
300
  """
301
 
302
  with conn.cursor() as cur:
303
- print(f"λ‚ μ§œ 검색 쿼리 μ‹€ν–‰: μ‹œμž‘μΌ={start_date}({start_timestamp}), μ’…λ£ŒμΌ={end_date}({end_timestamp})")
304
  # μ—¬κΈ°μ„œλŠ” limitλ₯Ό νŒŒλΌλ―Έν„°λ‘œ 전달
305
  cur.execute(sql, (limit,))
306
  rows = cur.fetchall()
 
6
  from typing import List, Dict, Tuple, Any
7
  from pgvector.psycopg2 import register_vector
8
  import numpy as np
9
+ from datetime import datetime, timezone, timedelta
10
 
11
  # κ°€μ€‘μΉ˜ 및 μž„κ³„κ°’ μ„€μ •
12
  DEFAULT_FULL_WEIGHT = 0.2
 
43
  try:
44
  response = client.embeddings.create(
45
  input=text,
46
+ model="text-embedding-ada-002",
47
+ encoding_format="float"
 
48
  )
49
  # λͺ…μ‹œμ μœΌλ‘œ float32둜 λ³€ν™˜ν•˜μ—¬ Java의 float[]와 ν˜Έν™˜λ˜κ²Œ 함
50
  return np.array(response.data[0].embedding, dtype=np.float32).tolist()
 
166
  "topic": get_text_value(metadata, "topic")
167
  }
168
 
169
+ # μ‹œκ°„ ν•„λ“œ λ³€ν™˜ 없이 κ·ΈλŒ€λ‘œ μ‚¬μš©
170
  if "startTime" in metadata and metadata["startTime"] is not None:
171
  result["startTime"] = metadata["startTime"]
172
 
 
179
  print(f"λ¬Έμ œκ°€ λ°œμƒν•œ 메타데이터: {metadata_json[:200]}...")
180
  continue
181
 
182
+ # μž„κ³„κ°’ 필터링
183
  filtered_results = [r for r in results if r["similarityScore"] >= threshold]
184
 
185
  if len(filtered_results) > 0:
 
229
  print(f"닀쀑 μž„λ² λ”© λ‚ μ§œ 검색 μ‹œμž‘: 쿼리='{query}', μ‹œμž‘μΌ={start_date}, μ’…λ£ŒμΌ={end_date}, μ΅œλŒ€ κ²°κ³Ό={limit}")
230
 
231
  try:
232
+ # λ‚ μ§œ ν•„ν„° 생성
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  # 쿼리 μž„λ² λ”© 생성
235
  query_embedding = get_embedding(query)
 
259
  OR agent_embedding IS NOT NULL
260
  """ % (query_vector, full_w, query_vector, topic_w, query_vector, customer_w, query_vector, agent_w)
261
 
262
+ # λ‚ μ§œ ν•„ν„° μΆ”κ°€
263
+ if start_date and start_date.strip():
264
+ # μ‹œμž‘ μ‹œκ°„ μΆ”κ°€ν•˜μ—¬ ISO ν˜•μ‹μœΌλ‘œ 비ꡐ
265
+ iso_start_date = start_date + "T00:00:00"
266
+ sql += f" AND metadata->>'startTime' >= '{iso_start_date}'"
267
 
268
+ if end_date and end_date.strip():
269
+ # μ’…λ£Œ μ‹œκ°„ μΆ”κ°€ν•˜μ—¬ ISO ν˜•μ‹μœΌλ‘œ 비ꡐ
270
+ iso_end_date = end_date + "T23:59:59"
271
+ sql += f" AND metadata->>'startTime' <= '{iso_end_date}'"
272
 
273
  sql += """
274
  )
 
283
  """
284
 
285
  with conn.cursor() as cur:
286
+ print(f"λ‚ μ§œ 검색 쿼리 μ‹€ν–‰: μ‹œμž‘μΌ={start_date}, μ’…λ£ŒμΌ={end_date}")
287
  # μ—¬κΈ°μ„œλŠ” limitλ₯Ό νŒŒλΌλ―Έν„°λ‘œ 전달
288
  cur.execute(sql, (limit,))
289
  rows = cur.fetchall()