Jake-seong commited on
Commit
269f105
ยท
verified ยท
1 Parent(s): 105bd50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -90
app.py CHANGED
@@ -26,18 +26,16 @@ def get_embedding(text: str) -> List[float]:
26
  """
27
  response = client.embeddings.create(
28
  input=text,
29
- model="text-embedding-ada-002"
30
  )
31
  return response.data[0].embedding
32
 
33
- def search_similar_chats(query: str, maxResults: int = 30000) -> List[Dict]:
34
  """
35
  ์œ ์‚ฌํ•œ ์ฑ„ํŒ… ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
36
-
37
  Args:
38
  query (str): ๊ฒ€์ƒ‰ํ•  ์ฟผ๋ฆฌ ํ…์ŠคํŠธ
39
  maxResults (int): ๋ฐ˜ํ™˜ํ•  ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
40
-
41
  Returns:
42
  List[Dict]: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ชฉ๋ก
43
  """
@@ -48,7 +46,6 @@ def search_similar_chats(query: str, maxResults: int = 30000) -> List[Dict]:
48
  cur.execute("""
49
  SELECT id, metadata, content, embedding <#> %s AS distance
50
  FROM vector_store
51
- WHERE metadata->>'documentType' = 'chatAnalysis'
52
  ORDER BY embedding <#> %s
53
  LIMIT %s
54
  """, (embedding, embedding, maxResults))
@@ -64,22 +61,20 @@ def search_similar_chats(query: str, maxResults: int = 30000) -> List[Dict]:
64
  for row in rows
65
  ]
66
 
67
- def search_chats_by_category_and_date(
68
- category: str,
69
  startDate: str = None,
70
  endDate: str = None,
71
- maxResults: int = 30000
72
  ) -> List[Dict]:
73
  """
74
- ์นดํ…Œ๊ณ ๋ฆฌ์™€ ์ง€์ •๋œ ๋‚ ์งœ ๋ฒ”์œ„์— ํ•ด๋‹นํ•˜๋Š” ์ฑ„ํŒ… ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
75
- (๊ฒ€์ƒ‰ ๊ธฐ๊ฐ„์€ ์ตœ๋Œ€ 31์ผ๊นŒ์ง€ ํ—ˆ์šฉ)
76
-
77
  Args:
78
- category (str): ์นดํ…Œ๊ณ ๋ฆฌ๋ช…
79
  startDate (str): ๊ฒ€์ƒ‰ ์‹œ์ž‘ ๋‚ ์งœ (YYYY-MM-DD)
80
  endDate (str): ๊ฒ€์ƒ‰ ์ข…๋ฃŒ ๋‚ ์งœ (YYYY-MM-DD)
81
  maxResults (int): ๋ฐ˜ํ™˜ํ•  ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
82
-
83
  Returns:
84
  List[Dict]: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ชฉ๋ก
85
  """
@@ -87,97 +82,47 @@ def search_chats_by_category_and_date(
87
  try:
88
  start_dt = datetime.strptime(startDate, "%Y-%m-%d")
89
  end_dt = datetime.strptime(endDate, "%Y-%m-%d")
90
- if (end_dt - start_dt).days > 31:
91
- raise ValueError("๊ฒ€์ƒ‰ ๊ธฐ๊ฐ„์€ ์ตœ๋Œ€ 31์ผ๊นŒ์ง€ ํ—ˆ์šฉ๋ฉ๋‹ˆ๋‹ค.")
92
  except Exception as e:
93
- raise ValueError(f"๋‚ ์งœ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š๊ฑฐ๋‚˜, ๊ธฐ๊ฐ„์ด ๋„ˆ๋ฌด ๊น๋‹ˆ๋‹ค: {e}")
94
- query = """
95
- SELECT id, metadata, content
96
- FROM vector_store
97
- WHERE metadata->>'documentType' = 'chatAnalysis'
98
- AND metadata->>'category' = %s
99
- AND (metadata->>'startTime') IS NOT NULL
100
- AND (metadata->>'startTime') <> ''
101
- """
102
- params = [category]
103
- if startDate not in (None, ""):
104
- query += " AND (metadata->>'startTime')::timestamp >= %s"
105
- params.append(startDate)
106
- if endDate not in (None, ""):
107
- query += " AND (metadata->>'startTime')::timestamp <= %s"
108
- params.append(endDate)
109
- query += " LIMIT %s"
110
- params.append(maxResults)
111
  conn = get_db_conn()
112
- with conn.cursor() as cur:
113
- cur.execute(query, tuple(params))
114
- rows = cur.fetchall()
115
- conn.close()
116
- return [
117
- {
118
- "id": row[0],
119
- "metadata": row[1],
120
- "content": row[2]
121
- }
122
- for row in rows
123
- ]
124
-
125
- def search_chats_by_product_and_date(
126
- productName: str = None,
127
- startDate: str = None,
128
- endDate: str = None,
129
- maxResults: int = 30000
130
- ) -> List[Dict]:
131
- """
132
- ์ƒํ’ˆ๋ช…๊ณผ ์ง€์ •๋œ ๋‚ ์งœ ๋ฒ”์œ„์— ํ•ด๋‹นํ•˜๋Š” ์ฑ„ํŒ… ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
133
- (๊ฒ€์ƒ‰ ๊ธฐ๊ฐ„์€ ์ตœ๋Œ€ 31์ผ๊นŒ์ง€ ํ—ˆ์šฉ)
134
-
135
- Args:
136
- productName (str): ์ƒํ’ˆ๋ช…(์˜ต์…˜)
137
- startDate (str): ๊ฒ€์ƒ‰ ์‹œ์ž‘ ๋‚ ์งœ (YYYY-MM-DD)
138
- endDate (str): ๊ฒ€์ƒ‰ ์ข…๋ฃŒ ๋‚ ์งœ (YYYY-MM-DD)
139
- maxResults (int): ๋ฐ˜ํ™˜ํ•  ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
140
-
141
- Returns:
142
- List[Dict]: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ชฉ๋ก
143
- """
144
- if startDate not in (None, "") and endDate not in (None, ""):
145
- try:
146
- start_dt = datetime.strptime(startDate, "%Y-%m-%d")
147
- end_dt = datetime.strptime(endDate, "%Y-%m-%d")
148
- if (end_dt - start_dt).days > 31:
149
- raise ValueError("๊ฒ€์ƒ‰ ๊ธฐ๊ฐ„์€ ์ตœ๋Œ€ 31์ผ๊นŒ์ง€ ํ—ˆ์šฉ๋ฉ๋‹ˆ๋‹ค.")
150
- except Exception as e:
151
- raise ValueError(f"๋‚ ์งœ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š๊ฑฐ๋‚˜, ๊ธฐ๊ฐ„์ด ๋„ˆ๋ฌด ๊น๋‹ˆ๋‹ค: {e}")
152
- query = """
153
- SELECT id, metadata, content
154
  FROM vector_store
155
- WHERE metadata->>'documentType' = 'chatAnalysis'
156
- AND (metadata->>'startTime') IS NOT NULL
157
  AND (metadata->>'startTime') <> ''
158
  """
159
- params = []
160
- if productName not in (None, ""):
161
- query += " AND (metadata->>'productName') = %s"
162
- params.append(productName)
163
  if startDate not in (None, ""):
164
- query += " AND (metadata->>'startTime')::timestamp >= %s"
165
  params.append(startDate)
 
166
  if endDate not in (None, ""):
167
- query += " AND (metadata->>'startTime')::timestamp <= %s"
168
  params.append(endDate)
169
- query += " LIMIT %s"
170
- params.append(maxResults)
171
- conn = get_db_conn()
 
 
172
  with conn.cursor() as cur:
173
- cur.execute(query, tuple(params))
174
  rows = cur.fetchall()
175
  conn.close()
 
176
  return [
177
  {
178
  "id": row[0],
179
  "metadata": row[1],
180
- "content": row[2]
 
181
  }
182
  for row in rows
183
  ]
@@ -186,8 +131,7 @@ def search_chats_by_product_and_date(
186
  with gr.Blocks() as demo:
187
  gr.Markdown("# Chat Analysis Search")
188
  gr.Interface(fn=search_similar_chats, inputs=["text", "number"], outputs="json", api_name="search_similar_chats")
189
- gr.Interface(fn=search_chats_by_category_and_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_chats_by_category_and_date")
190
- gr.Interface(fn=search_chats_by_product_and_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_chats_by_product_and_date")
191
 
192
  if __name__ == "__main__":
193
  demo.launch(mcp_server=True)
 
26
  """
27
  response = client.embeddings.create(
28
  input=text,
29
+ model="text-embedding-3-small"
30
  )
31
  return response.data[0].embedding
32
 
33
+ def search_similar_chats(query: str, maxResults: int = 10000) -> List[Dict]:
34
  """
35
  ์œ ์‚ฌํ•œ ์ฑ„ํŒ… ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
 
36
  Args:
37
  query (str): ๊ฒ€์ƒ‰ํ•  ์ฟผ๋ฆฌ ํ…์ŠคํŠธ
38
  maxResults (int): ๋ฐ˜ํ™˜ํ•  ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
 
39
  Returns:
40
  List[Dict]: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ชฉ๋ก
41
  """
 
46
  cur.execute("""
47
  SELECT id, metadata, content, embedding <#> %s AS distance
48
  FROM vector_store
 
49
  ORDER BY embedding <#> %s
50
  LIMIT %s
51
  """, (embedding, embedding, maxResults))
 
61
  for row in rows
62
  ]
63
 
64
+ def search_similar_chats_by_date(
65
+ query: str,
66
  startDate: str = None,
67
  endDate: str = None,
68
+ maxResults: int = 10000
69
  ) -> List[Dict]:
70
  """
71
+ ์ง€์ •๋œ ๋‚ ์งœ ๋ฒ”์œ„์— ํ•ด๋‹นํ•˜๋Š” ์œ ์‚ฌํ•œ ์ฑ„ํŒ… ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.
72
+
 
73
  Args:
74
+ query (str): ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
75
  startDate (str): ๊ฒ€์ƒ‰ ์‹œ์ž‘ ๋‚ ์งœ (YYYY-MM-DD)
76
  endDate (str): ๊ฒ€์ƒ‰ ์ข…๋ฃŒ ๋‚ ์งœ (YYYY-MM-DD)
77
  maxResults (int): ๋ฐ˜ํ™˜ํ•  ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
 
78
  Returns:
79
  List[Dict]: ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ๋ชฉ๋ก
80
  """
 
82
  try:
83
  start_dt = datetime.strptime(startDate, "%Y-%m-%d")
84
  end_dt = datetime.strptime(endDate, "%Y-%m-%d")
 
 
85
  except Exception as e:
86
+ raise ValueError(f"๋‚ ์งœ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค.: {e}")
87
+
88
+ embedding = np.array(get_embedding(query)) # numpy array๋กœ ๋ณ€ํ™˜
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  conn = get_db_conn()
90
+ register_vector(conn) # ๋ฒกํ„ฐ ํƒ€์ž… ์ž๋™ ๋ณ€ํ™˜ ์ง€์›
91
+
92
+ # SQL ์ฟผ๋ฆฌ ๊ตฌ์„ฑ
93
+ sql_query = """
94
+ SELECT id, metadata, content, embedding <#> %s AS distance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  FROM vector_store
96
+ WHERE (metadata->>'startTime') IS NOT NULL
 
97
  AND (metadata->>'startTime') <> ''
98
  """
99
+
100
+ params = [embedding]
101
+
102
+ # ๋‚ ์งœ ํ•„ํ„ฐ ์ถ”๊ฐ€
103
  if startDate not in (None, ""):
104
+ sql_query += " AND (metadata->>'startTime')::timestamp >= %s"
105
  params.append(startDate)
106
+
107
  if endDate not in (None, ""):
108
+ sql_query += " AND (metadata->>'startTime')::timestamp <= %s"
109
  params.append(endDate)
110
+
111
+ # ๋ฒกํ„ฐ ๊ฑฐ๋ฆฌ๋กœ ์ •๋ ฌํ•˜๊ณ  ๊ฒฐ๊ณผ ์ œํ•œ
112
+ sql_query += " ORDER BY embedding <#> %s LIMIT %s"
113
+ params.extend([embedding, maxResults])
114
+
115
  with conn.cursor() as cur:
116
+ cur.execute(sql_query, tuple(params))
117
  rows = cur.fetchall()
118
  conn.close()
119
+
120
  return [
121
  {
122
  "id": row[0],
123
  "metadata": row[1],
124
+ "content": row[2],
125
+ "distance": row[3]
126
  }
127
  for row in rows
128
  ]
 
131
  with gr.Blocks() as demo:
132
  gr.Markdown("# Chat Analysis Search")
133
  gr.Interface(fn=search_similar_chats, inputs=["text", "number"], outputs="json", api_name="search_similar_chats")
134
+ gr.Interface(fn=search_similar_chats_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chats_by_date")
 
135
 
136
  if __name__ == "__main__":
137
  demo.launch(mcp_server=True)