Robin Chiu commited on
Commit
e87f50f
·
1 Parent(s): 7ec1d7a

improve some tools.

Browse files
Files changed (2) hide show
  1. app.py +126 -49
  2. utils/tools.py +22 -11
app.py CHANGED
@@ -2,8 +2,13 @@ import gradio as gr
2
  import pandas as pd
3
  import sys
4
  import os
 
5
 
6
- from utils.tools import get_kb, get_schema, get_tables, get_meaning, execute_sqlite_query
 
 
 
 
7
 
8
  @gr.mcp.tool()
9
  def get_all_databases() -> list:
@@ -29,34 +34,63 @@ def kb_query(db_name, knowledge_keyword):
29
  Query the knowledge base for a specific database with optional keyword filtering.
30
 
31
  This function retrieves knowledge base information for a specified database.
32
- If a keyword is provided, it filters the results based on that keyword.
 
33
 
34
  Args:
35
  db_name (str): The name of the database to query. Must not be empty.
36
- knowledge_keyword (str): Optional keyword to filter knowledge base results.
 
37
  If empty or None, returns all knowledge for the database.
38
 
39
  Returns:
40
- pandas.DataFrame: Query results containing knowledge base data, or error message
41
- if no database is selected or no results found.
 
42
 
43
  Example:
44
- >>> result = kb_query("sales_db", "customer")
45
  >>> print(result)
46
- # Returns DataFrame with customer-related knowledge from sales_db
47
  """
48
  if not db_name:
49
- return pd.DataFrame({"message": ["請先選擇資料庫"]})
50
 
51
  if not knowledge_keyword:
52
  result = get_kb(db_name)
53
  else:
54
- result = get_kb(db_name, knowledge_keyword)
55
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  if len(result) == 0:
57
- return pd.DataFrame({"message": ["沒有找到相關知識"]})
58
-
59
- return result
 
 
 
 
 
 
 
 
 
 
60
 
61
  def schema_query(db_name, table_name):
62
  """
@@ -70,23 +104,29 @@ def schema_query(db_name, table_name):
70
  table_name (str): The name of the table to query schema for. Must not be empty.
71
 
72
  Returns:
73
- pandas.DataFrame: Query results containing table schema information, or error message
74
- if parameters are missing or no schema found.
 
75
 
76
  Example:
77
  >>> result = schema_query("sales_db", "customers")
78
  >>> print(result)
79
- # Returns DataFrame with column definitions for customers table
80
  """
81
  if not db_name or not table_name:
82
- return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
83
 
84
- result = get_schema(db_name, table_name)
 
85
 
86
- if len(result) == 0:
87
- return pd.DataFrame({"message": ["沒有找到相關資料表結構"]})
88
 
89
- return result
 
 
 
 
90
 
91
  def tables_query(db_name):
92
  """
@@ -125,56 +165,93 @@ def meaning_query(db_name, table_name):
125
  table_name (str): The name of the table to query column meanings for. Must not be empty.
126
 
127
  Returns:
128
- pandas.DataFrame: Query results containing column meanings and descriptions,
129
- or error message if parameters are missing or no meanings found.
 
130
 
131
  Example:
132
  >>> result = meaning_query("sales_db", "customers")
133
  >>> print(result)
134
- # Returns DataFrame with explanations for each column in customers table
135
  """
136
  if not db_name or not table_name:
137
- return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
138
 
139
  result = get_meaning(db_name, table_name)
140
 
141
  if len(result) == 0:
142
- return pd.DataFrame({"message": ["沒有找到相關欄位意義"]})
143
 
144
- return pd.DataFrame(result)
145
 
146
- def execute_sqlite(db_name: str, query: str):
147
  """
148
- Execute an SQL query on a specified SQLite database and return results.
149
-
150
- This function allows direct SQL query execution on the selected database,
151
- returning either the query results or error messages in a standardized format.
152
-
 
153
  Args:
154
- db_name (str): The name of the database to execute the query against. Must not be empty.
155
- query (str): The SQL query to execute. Must not be empty.
156
-
 
157
  Returns:
158
- pandas.DataFrame: If query executes successfully, returns DataFrame with query results.
159
- If query fails or parameters are missing, returns error message in DataFrame.
160
-
 
161
  Example:
162
- >>> result = execute_sqlite("sales_db", "SELECT * FROM customers LIMIT 10")
163
  >>> print(result)
164
- # Returns DataFrame with first 10 rows from customers table
 
 
 
 
165
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  if not db_name or not query:
167
  return pd.DataFrame({"message": ["請選擇資料庫和SQL"]})
168
 
169
  result = execute_sqlite_query(db_name, query)
170
 
171
- if len(result) == 0:
172
- return pd.DataFrame({"message": ["沒有找到相���資料"]})
173
 
174
  if result['success']:
175
- return pd.DataFrame(result['data'], columns=result['columns'])
 
176
  else:
177
- return pd.DataFrame({"message": [result['error']]})
178
 
179
 
180
  # 建立 Gradio 界面
@@ -215,13 +292,13 @@ with gr.Blocks(title="資料庫查詢工具") as demo:
215
  with gr.Tab("欄位意義查詢"):
216
  with gr.Row():
217
  meaning_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
218
- meaning_table = gr.Text(label="選擇資料表")
219
- meaning_search = gr.Button("查詢欄位意義")
220
  meaning_result = gr.DataFrame(label="查詢結果")
221
 
222
  # 當資料庫選擇變更時,更新資料表下拉選單
223
  # meaning_db.change(update_tables, inputs=meaning_db, outputs=meaning_table)
224
- meaning_search.click(meaning_query, inputs=[meaning_db, meaning_table], outputs=meaning_result)
225
 
226
  with gr.Tab("sqlite SQL 執行"):
227
  with gr.Row():
@@ -233,4 +310,4 @@ with gr.Blocks(title="資料庫查詢工具") as demo:
233
 
234
  # 啟動 Gradio 應用程式
235
  if __name__ == "__main__":
236
- demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"], share=True)
 
2
  import pandas as pd
3
  import sys
4
  import os
5
+ import json
6
 
7
+ from utils.tools import get_kb, get_schema, get_tables, get_meaning, execute_sqlite_query, search_meaning
8
+
9
+
10
+ def get_message(msg):
11
+ return {"status": "error", "headers":['message'], "data": [[f"{msg}"]]}
12
 
13
  @gr.mcp.tool()
14
  def get_all_databases() -> list:
 
34
  Query the knowledge base for a specific database with optional keyword filtering.
35
 
36
  This function retrieves knowledge base information for a specified database.
37
+ If keywords are provided, it filters the results based on those keywords.
38
+ Multiple keywords can be separated by commas, 'or', or spaces.
39
 
40
  Args:
41
  db_name (str): The name of the database to query. Must not be empty.
42
+ knowledge_keyword (str): Optional keywords to filter knowledge base results.
43
+ Multiple keywords can be separated by commas, 'or', or spaces.
44
  If empty or None, returns all knowledge for the database.
45
 
46
  Returns:
47
+ dict: Dictionary containing query results with structure:
48
+ - Success: {"status": "success", "headers": [...], "data": [...]}
49
+ - Error: {"status": "error", "headers": ['message'], "data": [['Error message.']]}
50
 
51
  Example:
52
+ >>> result = kb_query("sales_db", "customer, order")
53
  >>> print(result)
54
+ # Returns dict with customer and order-related knowledge from sales_db
55
  """
56
  if not db_name:
57
+ return get_message("Please select db_name.")
58
 
59
  if not knowledge_keyword:
60
  result = get_kb(db_name)
61
  else:
62
+ # Split keywords by comma, 'or', or space
63
+ keywords = [k.strip() for k in knowledge_keyword.replace('or', ',').replace(' ', ',').split(',') if k.strip()]
64
+
65
+ # Get results for each keyword and combine them
66
+ combined_results = []
67
+ for keyword in keywords:
68
+ keyword_result = get_kb(db_name, keyword)
69
+ # 使用 append 而不是 extend,因為 get_kb 返回 DataFrame
70
+ if not keyword_result.empty:
71
+ combined_results.append(keyword_result)
72
+
73
+ # Remove duplicates if any
74
+ # Convert DataFrames to a single DataFrame and remove duplicate rows
75
+ if combined_results:
76
+ result = pd.concat(combined_results, ignore_index=True).drop_duplicates()
77
+ else:
78
+ result = pd.DataFrame()
79
+
80
  if len(result) == 0:
81
+ return get_message("Not Found.")
82
+
83
+ # Convert DataFrame to JSON format with columns and rows
84
+ data = {
85
+ "columns": result.columns.tolist(),
86
+ "rows": result.values.tolist()
87
+ }
88
+ return {"status": "success", "headers": result.columns.tolist(), "data":result.values.tolist()}
89
+ # return json.dumps(
90
+ # {"status": "success", "data": data},
91
+ # ensure_ascii=False,
92
+ # indent=2
93
+ # )
94
 
95
  def schema_query(db_name, table_name):
96
  """
 
104
  table_name (str): The name of the table to query schema for. Must not be empty.
105
 
106
  Returns:
107
+ dict: Dictionary containing query results with structure:
108
+ - Success: {"status": "success", "headers": [...], "data": [...]}
109
+ - Error: {"status": "error", "headers": ['message'], "data": [['Error message.']]}
110
 
111
  Example:
112
  >>> result = schema_query("sales_db", "customers")
113
  >>> print(result)
114
+ # Returns dict with column definitions for customers table
115
  """
116
  if not db_name or not table_name:
117
+ return get_message("請選擇資料庫和資料表")
118
 
119
+ # 取得 schema 資訊
120
+ schema_result = get_schema(db_name, table_name)
121
 
122
+ if len(schema_result) == 0:
123
+ return get_message("Not Found.")
124
 
125
+ return {
126
+ "status": "success",
127
+ "headers": schema_result.columns.tolist(),
128
+ "data": schema_result.values.tolist()
129
+ }
130
 
131
  def tables_query(db_name):
132
  """
 
165
  table_name (str): The name of the table to query column meanings for. Must not be empty.
166
 
167
  Returns:
168
+ dict: Dictionary containing query results with structure:
169
+ - Success: {"status": "success", "headers": [...], "data": [...]}
170
+ - Error: {"status": "error", "headers": ['message'], "data": [['Error message.']]}
171
 
172
  Example:
173
  >>> result = meaning_query("sales_db", "customers")
174
  >>> print(result)
175
+ # Returns dict with explanations for each column in customers table
176
  """
177
  if not db_name or not table_name:
178
+ return get_message("請選擇資料庫和資料表")
179
 
180
  result = get_meaning(db_name, table_name)
181
 
182
  if len(result) == 0:
183
+ return get_message("Not Found.")
184
 
185
+ return {"status": "success", "headers": result.columns.tolist(), "data": result.values.tolist()}
186
 
187
+ def meaning_search(db_name, keyword):
188
  """
189
+ Search for column meanings using one or more keywords in a specific database.
190
+
191
+ This function searches for columns whose names or meanings contain the specified
192
+ keyword(s), helping users find relevant fields across all tables in the database.
193
+ Multiple keywords can be separated by commas or spaces.
194
+
195
  Args:
196
+ db_name (str): The name of the database to search in. Must not be empty.
197
+ keyword (str): The search keyword(s) to match against column names or meanings.
198
+ Multiple keywords can be separated by commas or spaces. Must not be empty.
199
+
200
  Returns:
201
+ dict: Dictionary containing query results with structure:
202
+ - Success: {"status": "success", "headers": [...], "data": [...]}
203
+ - Error: {"status": "error", "headers": ['message'], "data": [['Error message.']]}
204
+
205
  Example:
206
+ >>> result = meaning_search("sales_db", "customer")
207
  >>> print(result)
208
+ # Returns dict with all columns containing "customer" in their name or meaning
209
+
210
+ >>> result = meaning_search("sales_db", "customer, order, product")
211
+ >>> print(result)
212
+ # Returns dict with all columns containing "customer", "order", or "product" in their name or meaning
213
  """
214
+ if not db_name or not keyword:
215
+ return get_message("Please input keyword.")
216
+
217
+ # Split keywords by comma or space
218
+ keywords = [k.strip() for k in keyword.replace(' ', ',').split(',') if k.strip()]
219
+
220
+ # Get results for each keyword and combine them
221
+ combined_results = []
222
+ for kw in keywords:
223
+ keyword_result = search_meaning(db_name, kw)
224
+ # Append non-empty results
225
+ if not keyword_result.empty:
226
+ combined_results.append(keyword_result)
227
+
228
+ # Remove duplicates if any
229
+ # Convert DataFrames to a single DataFrame and remove duplicate rows
230
+ if combined_results:
231
+ result = pd.concat(combined_results, ignore_index=True).drop_duplicates()
232
+ else:
233
+ result = pd.DataFrame()
234
+
235
+ if len(result) == 0:
236
+ return get_message("Not Found.")
237
+
238
+ return {"status": "success", "headers": result.columns.tolist(), "data": result.values.tolist()}
239
+
240
+ def execute_sqlite(db_name: str, query: str):
241
+
242
  if not db_name or not query:
243
  return pd.DataFrame({"message": ["請選擇資料庫和SQL"]})
244
 
245
  result = execute_sqlite_query(db_name, query)
246
 
247
+ # if len(result) == 0:
248
+ # return pd.DataFrame({"message": ["沒有找到相關資料"]})
249
 
250
  if result['success']:
251
+ # return pd.DataFrame(result['data'], columns=result['columns'])
252
+ return pd.DataFrame({"type":["OK"], "message": ["Success"]})
253
  else:
254
+ return pd.DataFrame({"type":["Error"], "message": [result['error']]})
255
 
256
 
257
  # 建立 Gradio 界面
 
292
  with gr.Tab("欄位意義查詢"):
293
  with gr.Row():
294
  meaning_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
295
+ meaning_keyword = gr.Text(label="Keyword")
296
+ search_button = gr.Button("查詢欄位意義")
297
  meaning_result = gr.DataFrame(label="查詢結果")
298
 
299
  # 當資料庫選擇變更時,更新資料表下拉選單
300
  # meaning_db.change(update_tables, inputs=meaning_db, outputs=meaning_table)
301
+ search_button.click(meaning_search, inputs=[meaning_db, meaning_keyword], outputs=meaning_result)
302
 
303
  with gr.Tab("sqlite SQL 執行"):
304
  with gr.Row():
 
310
 
311
  # 啟動 Gradio 應用程式
312
  if __name__ == "__main__":
313
+ demo.launch(mcp_server=True, server_name="0.0.0.0", allowed_paths=["/"], share=True)
utils/tools.py CHANGED
@@ -7,15 +7,15 @@ import shutil
7
  import uuid
8
 
9
 
10
- kb_df = pd.read_csv("./data/kb.csv")
11
  def get_kb(db_name, knowledge=None):
12
  if not knowledge:
13
  result = kb_df[(kb_df['db_name']==db_name)]
14
  else:
15
- result = kb_df[(kb_df['db_name']==db_name) & (kb_df['knowledge'].str.contains(knowledge))]
16
  return result
17
 
18
- schema_df = pd.read_csv("./data/db_schema.csv")
19
  def get_schema(db_name, table_name):
20
  result = schema_df[(schema_df['db_name']==db_name) & (schema_df['table_name']==table_name)]
21
  result = result[['schema', 'sample_data']]
@@ -27,10 +27,20 @@ def get_tables(db_name):
27
  tables = result['table_name'].to_list()
28
  return tables
29
 
30
- meaning_df = pd.read_csv("./data/column_meanings.csv")
31
  def get_meaning(db_name, table_name):
32
  result = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['table_name']==table_name)]
33
- result = result[['column_name', 'meaning']]
 
 
 
 
 
 
 
 
 
 
34
  return result
35
 
36
  def execute_sqlite_query(db_name, query):
@@ -67,12 +77,13 @@ def execute_sqlite_query(db_name, query):
67
  conn = sqlite3.connect(tmp_db_file)
68
  cursor = conn.cursor()
69
 
70
- # 將查詢按分號分開並執行每條查詢
71
- queries = query.split(';')
72
- for q in queries:
73
- q = q.strip()
74
- if q: # 確保不執行空查詢
75
- cursor.execute(q)
 
76
 
77
  # 獲取結果
78
  try:
 
7
  import uuid
8
 
9
 
10
+ kb_df = pd.read_csv("./data/kb.csv", index_col=0)
11
  def get_kb(db_name, knowledge=None):
12
  if not knowledge:
13
  result = kb_df[(kb_df['db_name']==db_name)]
14
  else:
15
+ result = kb_df[(kb_df['db_name']==db_name) & (kb_df['knowledge'].str.contains(knowledge, case=False))]
16
  return result
17
 
18
+ schema_df = pd.read_csv("./data/db_schema.csv", index_col=0)
19
  def get_schema(db_name, table_name):
20
  result = schema_df[(schema_df['db_name']==db_name) & (schema_df['table_name']==table_name)]
21
  result = result[['schema', 'sample_data']]
 
27
  tables = result['table_name'].to_list()
28
  return tables
29
 
30
+ meaning_df = pd.read_csv("./data/column_meanings.csv", index_col=0)
31
  def get_meaning(db_name, table_name):
32
  result = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['table_name']==table_name)]
33
+ result = result[['table_name', 'column_name', 'meaning']]
34
+ return result
35
+
36
+ def search_meaning(db_name, keyword):
37
+ # Search in meaning column
38
+ result_meaning = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['meaning'].str.contains(keyword, case=False))]
39
+ # Search in column_name column
40
+ result_column = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['column_name'].str.contains(keyword, case=False))]
41
+ # Combine results and remove duplicates
42
+ result = pd.concat([result_meaning, result_column]).drop_duplicates()
43
+ result = result[['table_name', 'column_name', 'meaning']]
44
  return result
45
 
46
  def execute_sqlite_query(db_name, query):
 
77
  conn = sqlite3.connect(tmp_db_file)
78
  cursor = conn.cursor()
79
 
80
+ # # 將查詢按分號分開並執行每條查詢
81
+ # queries = query.split(';')
82
+ # for q in queries:
83
+ # q = q.strip()
84
+ # if q: # 確保不執行空查詢
85
+ # cursor.execute(q)
86
+ cursor.executescript(query)
87
 
88
  # 獲取結果
89
  try: