Spaces:
Paused
Paused
| import gradio as gr | |
| import psycopg2 | |
| from openai import OpenAI | |
| import json | |
| import os | |
| from typing import List, Dict | |
| from pgvector.psycopg2 import register_vector | |
| import numpy as np | |
| from datetime import datetime | |
| # DB ์ฐ๊ฒฐ ์ค์ | |
| def get_db_conn(): | |
| return psycopg2.connect( | |
| host=os.environ["VECTOR_HOST"], | |
| port=5432, | |
| dbname=os.environ["VECTOR_DBNAME"], | |
| user=os.environ["VECTOR_USER"], | |
| password=os.environ["VECTOR_SECRET"] | |
| ) | |
| client = OpenAI() | |
| def get_embedding(text: str) -> List[float]: | |
| """ํ ์คํธ๋ฅผ ์๋ฒ ๋ฉ ๋ฒกํฐ๋ก ๋ณํํฉ๋๋ค.""" | |
| response = client.embeddings.create( | |
| input=text, | |
| model="text-embedding-3-small" | |
| ) | |
| return response.data[0].embedding | |
| def search_similar_chats(query: str, maxResults: int = 200) -> List[Dict]: | |
| """ | |
| ์ ์ฌํ ์ฑํ ๋ฌธ์๋ฅผ ๊ฒ์ํฉ๋๋ค. | |
| Args: | |
| query (str): ๊ฒ์ํ ์ฟผ๋ฆฌ ํ ์คํธ | |
| maxResults (int): ๋ฐํํ ์ต๋ ๊ฒฐ๊ณผ ์ | |
| Returns: | |
| List[Dict]: ๊ฒ์ ๊ฒฐ๊ณผ ๋ชฉ๋ก | |
| """ | |
| embedding = np.array(get_embedding(query)) | |
| conn = get_db_conn() | |
| register_vector(conn) | |
| try: | |
| with conn.cursor() as cur: | |
| # ์ฝ์ฌ์ธ ์ ์ฌ๋ ์ฐ์ฐ์ ๋ณ๊ฒฝ (<=> ์ฌ์ฉ) | |
| cur.execute(""" | |
| SELECT id, metadata, content, | |
| 1 - (embedding <=> %s) AS similarity | |
| FROM vector_store | |
| ORDER BY similarity DESC | |
| LIMIT %s | |
| """, (embedding, maxResults)) | |
| rows = cur.fetchall() | |
| return [{ | |
| "id": row[0], | |
| "metadata": row[1], | |
| "content": row[2], | |
| "similarity": float(row[3]) | |
| } for row in rows] | |
| except Exception as e: | |
| raise RuntimeError(f"DB ๊ฒ์ ์ค๋ฅ: {str(e)}") | |
| finally: | |
| conn.close() | |
| def search_similar_chats_by_date( | |
| query: str, | |
| startDate: str = None, | |
| endDate: str = None, | |
| maxResults: int = 200 | |
| ) -> List[Dict]: | |
| """ | |
| ์ง์ ๋ ๋ ์ง ๋ฒ์์ ํด๋นํ๋ ์ ์ฌํ ์ฑํ ๋ฌธ์๋ฅผ ๊ฒ์ํฉ๋๋ค. | |
| Args: | |
| query (str): ๊ฒ์ ์ฟผ๋ฆฌ | |
| startDate (str): ๊ฒ์ ์์ ๋ ์ง (YYYY-MM-DD) | |
| endDate (str): ๊ฒ์ ์ข ๋ฃ ๋ ์ง (YYYY-MM-DD) | |
| maxResults (int): ๋ฐํํ ์ต๋ ๊ฒฐ๊ณผ ์ | |
| Returns: | |
| List[Dict]: ๊ฒ์ ๊ฒฐ๊ณผ ๋ชฉ๋ก | |
| """ | |
| try: | |
| start_dt = datetime.strptime(startDate, "%Y-%m-%d") if startDate else None | |
| end_dt = datetime.strptime(endDate, "%Y-%m-%d") if endDate else None | |
| except ValueError as e: | |
| raise ValueError(f"๋ ์ง ํ์ ์ค๋ฅ: {e}") | |
| embedding = np.array(get_embedding(query)) | |
| conn = get_db_conn() | |
| register_vector(conn) | |
| try: | |
| with conn.cursor() as cur: | |
| base_query = """ | |
| SELECT id, metadata, content, | |
| 1 - (embedding <=> %s) AS similarity | |
| FROM vector_store | |
| WHERE 1=1 | |
| """ | |
| params = [embedding] | |
| # ๋์ ์ฟผ๋ฆฌ ๊ตฌ์ฑ | |
| if startDate: | |
| base_query += " AND (metadata->>'startTime')::date >= %s" | |
| params.append(startDate) | |
| if endDate: | |
| base_query += " AND (metadata->>'startTime')::date <= %s" | |
| params.append(endDate) | |
| base_query += " ORDER BY similarity DESC LIMIT %s" | |
| params.append(maxResults) | |
| cur.execute(base_query, tuple(params)) | |
| rows = cur.fetchall() | |
| return [{ | |
| "id": row[0], | |
| "metadata": row[1], | |
| "content": row[2], | |
| "similarity": float(row[3]) | |
| } for row in rows] | |
| except Exception as e: | |
| raise RuntimeError(f"DB ๊ฒ์ ์ค๋ฅ: {str(e)}") | |
| finally: | |
| conn.close() | |
| # Gradio Blocks์ ํจ์ ๋ฑ๋ก | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Chat Analysis Search") | |
| gr.Interface(fn=search_similar_chats, inputs=["text", "number"], outputs="json", api_name="search_similar_chats") | |
| gr.Interface(fn=search_similar_chats_by_date, inputs=["text", "text", "text", "number"], outputs="json", api_name="search_similar_chats_by_date") | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |