Robin Chiu commited on
Commit
ed2fe48
·
1 Parent(s): 07cc8e5

add the data and utils.

Browse files
app.py CHANGED
@@ -1,244 +1,193 @@
1
- # %%
2
- import requests
3
- from bs4 import BeautifulSoup
4
  import gradio as gr
 
 
 
 
 
5
 
6
- def parse_news_item(html: str) -> dict:
 
7
  """
8
- Parse HTML of a news item to extract link, time, headline, and text.
 
 
9
 
10
- Args:
11
- html: The HTML string of a news item.
12
-
13
  Returns:
14
- A dictionary containing link, time, headline, and text.
15
-
16
- Raises:
17
- Exception: For parsing errors or other unexpected errors.
 
 
18
  """
19
- try:
20
- soup = BeautifulSoup(html, "html.parser")
21
-
22
- # Get the anchor tag containing the link
23
- link_tag = soup.find("a", href=True)
24
- link = link_tag["href"] if link_tag else None
25
-
26
- # Get the headline inside <h3>
27
- headline_tag = soup.find("h3", class_="story__headline")
28
- headline = headline_tag.get_text(strip=True) if headline_tag else None
29
 
30
- # Get the text inside <p>
31
- text_tag = soup.find("p", class_="story__text")
32
- text = text_tag.get_text(strip=True) if text_tag else None
33
-
34
- # Get the time inside <time>
35
- time_tag = soup.find("time")
36
- time = time_tag.get_text(strip=True) if time_tag else None
37
-
38
- return {
39
- "link": link,
40
- "time": time,
41
- "headline": headline,
42
- "text": text,
43
- }
44
- except Exception as e:
45
- print(f"Error parsing news item: {e}")
46
- raise
47
-
48
-
49
- # %%
50
- def search_news(keyword, page=1) -> list:
51
  """
52
- Fetch news articles related to a keyword from udn.com.
 
 
 
53
 
54
  Args:
55
- keyword: The search keyword for news articles.
56
- page: The page number to fetch (default is 1).
 
57
 
58
  Returns:
59
- A list of dictionaries containing link, time, headline and text of news article data.
60
-
61
- Raises:
62
- requests.RequestException: If there's an error fetching data from the URL.
63
- Exception: For other unexpected errors.
 
 
64
  """
65
- try:
66
- url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
67
- response = requests.get(url)
68
-
69
- if response.status_code != 200:
70
- raise requests.RequestException(f"Failed to retrieve data: {response.status_code}")
71
-
72
- soup = BeautifulSoup(response.text, 'html.parser')
73
- articles = soup.select('div > div > main > section > ul > li')
74
-
75
- results = []
76
- for article in articles:
77
- try:
78
- article_html = article.prettify()
79
- data = parse_news_item(article_html)
80
- # change dict to list
81
- data_list = list(data.values())
82
- results.append(data_list)
83
- except Exception as e:
84
- print(f"Error parsing article: {e}")
85
- continue
86
-
87
- return results
88
- except requests.RequestException as e:
89
- print(f"Network error in search_news: {e}")
90
- raise
91
- except Exception as e:
92
- print(f"Unexpected error in search_news: {e}")
93
- raise
94
-
95
- # search_news('台積電', 1) # Example usage to fetch news articles related to '台積電'
96
 
97
- # %%
98
- # write a function to get the url and parse the content
99
- def get_content(url) -> dict:
100
  """
101
- Fetch and parse the content of a given URL.
 
 
 
102
 
103
  Args:
104
- url: The URL to fetch and parse.
105
-
 
106
  Returns:
107
- A dictionary containing the title, text content, and HTML of the page.
108
-
109
- Raises:
110
- requests.RequestException: If there's an error fetching data from the URL.
111
- Exception: For other unexpected errors.
 
 
112
  """
113
- try:
114
- response = requests.get(url)
115
-
116
- if response.status_code != 200:
117
- raise requests.RequestException(f"Failed to retrieve {url}: {response.status_code}")
118
-
119
- soup = BeautifulSoup(response.text, 'html.parser')
120
-
121
- # using select to get the text inside the #article_body
122
- # This assumes the content is inside an element with id="article_body"
123
- article_body = soup.select_one('#article_body')
124
- text_content = ''
125
- if article_body:
126
- text_content = article_body.get_text(separator='\n', strip=True)
127
-
128
- return {
129
- 'link': url,
130
- 'title': soup.title.string if soup.title else 'No title',
131
- 'text': text_content
132
- }
133
- except requests.RequestException as e:
134
- print(f"Network error in get_content: {e}")
135
- raise
136
- except Exception as e:
137
- print(f"Unexpected error in get_content: {e}")
138
- raise
139
-
140
- # %%
141
- from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
142
- import os
143
-
144
- model_name = os.environ.get("AI_MODEL", "openrouter/qwen/qwen-2.5-coder-32b-instruct:free")
145
- model = LiteLLMModel(model_name, api_key=os.environ["OPENROUTER_API_KEY"])
146
- url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
147
- server_parameters = {"url": url, "transport": "sse"}
148
 
149
- def newsAgent(task: str) -> str:
150
  """
151
- News Agent to handle the news task.
 
 
152
 
153
  Args:
154
- task: The task description.
155
-
 
156
  Returns:
157
- The result of the Task.
158
-
159
- Raises:
160
- Exception: For errors during agent execution.
 
 
 
161
  """
162
- try:
163
- result = ""
164
- with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
165
- agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
166
- for event in agent.run(task, stream=True, max_steps=5):
167
- if isinstance(event, ActionStep):
168
- result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
169
- # yield result
170
- if isinstance(event, FinalAnswerStep):
171
- result += f"\n## ======Final======\n{event.output}"
172
- # yield result
173
- return result
174
- except Exception as e:
175
- error_msg = f"Error in newsAgent: {e}"
176
- print(error_msg)
177
- raise Exception(error_msg) from e
178
 
179
- # get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result') # Example usage to fetch content from a specific URL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- # %%
182
- # using the gradio to create two tab
183
- # 1. search news
184
- # 2. get content from url
185
- def main():
186
- with gr.Blocks() as demo:
187
- gr.Markdown("# News Search and Content Fetcher")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- with gr.Tab("Search News"):
190
- keyword = gr.Textbox(label="Keyword", placeholder="Enter keyword to search news")
191
- page = gr.Number(label="Page Number", value=1, step=1)
192
- search_button = gr.Button("Search")
193
- search_results = gr.DataFrame(label="Search Results", headers=["Link", "Time", "Headline", "Text"])
194
- # Examples for Search News tab
195
- gr.Examples(
196
- examples=[
197
- ["AI", 1],
198
- ["華碩", 2]
199
- ],
200
- inputs=[keyword, page],
201
- outputs=search_results,
202
- fn=search_news,
203
- cache_examples=False
204
- )
205
- search_button.click(search_news, inputs=[keyword, page], outputs=search_results)
206
-
207
 
208
- with gr.Tab("Get Content from URL"):
209
- url_input = gr.Textbox(label="URL", placeholder="Enter URL to fetch content")
210
- content_output = gr.JSON(label="Content Output")
211
- # Examples for Get Content of News tab
212
- gr.Examples(
213
- examples=[
214
- ["https://money.udn.com/money/story/5722/8870335?from=edn_search_result"],
215
- ["https://money.udn.com/money/story/5612/8868152?from=edn_search_result"]
216
- ],
217
- inputs=[url_input],
218
- outputs=content_output,
219
- fn=get_content,
220
- cache_examples=False
221
- )
222
- url_input.submit(get_content, inputs=url_input, outputs=content_output)
223
-
224
- with gr.Tab("News Agent"):
225
- agent_input = gr.Textbox(label="Task", placeholder="Enter the task")
226
- # run_button = gr.Button("Run")
227
- result_output = gr.Markdown(label="Result")
228
- # Examples for Get Content of News tab
229
- gr.Examples(
230
- examples=[
231
- ["華碩今日新聞"],
232
- ["華碩和Nvidia今日新聞"]
233
- ],
234
- inputs=[agent_input],
235
- outputs=result_output,
236
- fn=newsAgent,
237
- cache_examples=True
238
- )
239
- agent_input.submit(newsAgent, inputs=agent_input, outputs=result_output)
240
-
241
- demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"], share=True)
242
-
243
  if __name__ == "__main__":
244
- main()
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import sys
4
+ import os
5
+
6
+ from utils.tools import get_kb, get_schema, get_tables, get_meaning
7
 
8
+ @gr.mcp.tool()
9
+ def get_all_databases() -> list:
10
  """
11
+ Get all available database names from the schema file.
12
+
13
+ This function reads the database schema CSV file and extracts unique database names.
14
 
 
 
 
15
  Returns:
16
+ list: A sorted list of unique database names available in the system.
17
+
18
+ Example:
19
+ >>> databases = get_all_databases()
20
+ >>> print(databases)
21
+ ['db1', 'db2', 'db3']
22
  """
23
+ # 從 schema_df 中獲取所有唯一的 db_name
24
+ schema_df = pd.read_csv("./data/db_schema.csv")
25
+ return sorted(schema_df['db_name'].unique().tolist())
 
 
 
 
 
 
 
26
 
27
+ def kb_query(db_name, knowledge_keyword):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  """
29
+ Query the knowledge base for a specific database with optional keyword filtering.
30
+
31
+ This function retrieves knowledge base information for a specified database.
32
+ If a keyword is provided, it filters the results based on that keyword.
33
 
34
  Args:
35
+ db_name (str): The name of the database to query. Must not be empty.
36
+ knowledge_keyword (str): Optional keyword to filter knowledge base results.
37
+ If empty or None, returns all knowledge for the database.
38
 
39
  Returns:
40
+ pandas.DataFrame: Query results containing knowledge base data, or error message
41
+ if no database is selected or no results found.
42
+
43
+ Example:
44
+ >>> result = kb_query("sales_db", "customer")
45
+ >>> print(result)
46
+ # Returns DataFrame with customer-related knowledge from sales_db
47
  """
48
+ if not db_name:
49
+ return pd.DataFrame({"message": ["請先選擇資料庫"]})
50
+
51
+ if not knowledge_keyword:
52
+ result = get_kb(db_name)
53
+ else:
54
+ result = get_kb(db_name, knowledge_keyword)
55
+
56
+ if len(result) == 0:
57
+ return pd.DataFrame({"message": ["沒有找到相關知識"]})
58
+
59
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ def schema_query(db_name, table_name):
 
 
62
  """
63
+ Query the schema structure for a specific table in a database.
64
+
65
+ This function retrieves detailed schema information for a specified table
66
+ within a given database, including column definitions, data types, and constraints.
67
 
68
  Args:
69
+ db_name (str): The name of the database containing the table. Must not be empty.
70
+ table_name (str): The name of the table to query schema for. Must not be empty.
71
+
72
  Returns:
73
+ pandas.DataFrame: Query results containing table schema information, or error message
74
+ if parameters are missing or no schema found.
75
+
76
+ Example:
77
+ >>> result = schema_query("sales_db", "customers")
78
+ >>> print(result)
79
+ # Returns DataFrame with column definitions for customers table
80
  """
81
+ if not db_name or not table_name:
82
+ return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
83
+
84
+ result = get_schema(db_name, table_name)
85
+
86
+ if len(result) == 0:
87
+ return pd.DataFrame({"message": ["沒有找到相關資料表結構"]})
88
+
89
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ def tables_query(db_name):
92
  """
93
+ Get list of all tables available in a specific database.
94
+
95
+ This function retrieves all table names that exist within the specified database.
96
 
97
  Args:
98
+ db_name (str): The name of the database to query tables from.
99
+ If empty or None, returns empty list.
100
+
101
  Returns:
102
+ list: List of table names in the specified database. Returns empty list
103
+ if database name is not provided or no tables found.
104
+
105
+ Example:
106
+ >>> tables = tables_query("sales_db")
107
+ >>> print(tables)
108
+ ['customers', 'orders', 'products', 'inventory']
109
  """
110
+ if not db_name:
111
+ return []
112
+
113
+ return get_tables(db_name)
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ def meaning_query(db_name, table_name):
116
+ """
117
+ Query the meaning and description of columns in a specific table.
118
+
119
+ This function retrieves detailed explanations and meanings for each column
120
+ in the specified table, helping users understand the purpose and content
121
+ of each field.
122
+
123
+ Args:
124
+ db_name (str): The name of the database containing the table. Must not be empty.
125
+ table_name (str): The name of the table to query column meanings for. Must not be empty.
126
+
127
+ Returns:
128
+ pandas.DataFrame: Query results containing column meanings and descriptions,
129
+ or error message if parameters are missing or no meanings found.
130
+
131
+ Example:
132
+ >>> result = meaning_query("sales_db", "customers")
133
+ >>> print(result)
134
+ # Returns DataFrame with explanations for each column in customers table
135
+ """
136
+ if not db_name or not table_name:
137
+ return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
138
+
139
+ result = get_meaning(db_name, table_name)
140
+
141
+ if len(result) == 0:
142
+ return pd.DataFrame({"message": ["沒有找到相關欄位意義"]})
143
+
144
+ return result
145
 
146
+ # 建立 Gradio 界面
147
+ with gr.Blocks(title="資料庫查詢工具") as demo:
148
+ gr.Markdown("# 資料庫查詢工具")
149
+ gr.Markdown("這個工具可以幫助您查詢資料庫的知識庫、資料表結構和欄位意義。")
150
+
151
+ # 獲取所有可用的資料庫
152
+ all_dbs = get_all_databases()
153
+
154
+ with gr.Tab("知識庫查詢"):
155
+ with gr.Row():
156
+ kb_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
157
+ kb_keyword = gr.Textbox(label="知識關鍵字 (可選)")
158
+ kb_search = gr.Button("查詢知識庫")
159
+ kb_result = gr.DataFrame(label="查詢結果")
160
+ kb_search.click(kb_query, inputs=[kb_db, kb_keyword], outputs=kb_result)
161
+ gr.api(get_all_databases)
162
+
163
+ with gr.Tab("資料表查詢"):
164
+ with gr.Row():
165
+ kb_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
166
+ kb_search = gr.Button("查詢資料表")
167
+ kb_result = gr.DataFrame(label="查詢結果")
168
+ kb_search.click(tables_query, inputs=[kb_db], outputs=kb_result)
169
+
170
+ with gr.Tab("資料表結構查詢"):
171
+ with gr.Row():
172
+ schema_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
173
+ schema_table = gr.Text(label="選擇資料表")
174
+ schema_search = gr.Button("查詢資料表結構")
175
+ schema_result = gr.DataFrame(label="查詢結果")
176
 
177
+ # 當資料庫選擇變更時,更新資料表下拉選單
178
+ # schema_db.change(update_tables, inputs=schema_db, outputs=schema_table)
179
+ schema_search.click(schema_query, inputs=[schema_db, schema_table], outputs=schema_result)
180
+
181
+ with gr.Tab("欄位意義查詢"):
182
+ with gr.Row():
183
+ meaning_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
184
+ meaning_table = gr.Text(label="選擇資料表")
185
+ meaning_search = gr.Button("查詢欄位意義")
186
+ meaning_result = gr.DataFrame(label="查詢結果")
 
 
 
 
 
 
 
 
187
 
188
+ # 當資料庫選擇變更時,更新資料表下拉選單
189
+ # meaning_db.change(update_tables, inputs=meaning_db, outputs=meaning_table)
190
+ meaning_search.click(meaning_query, inputs=[meaning_db, meaning_table], outputs=meaning_result)
191
+ # 啟動 Gradio 應用程式
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  if __name__ == "__main__":
193
+ demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"], share=True)
data/column_meanings.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/db_schema.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/kb.csv ADDED
The diff for this file is too large to render. See raw diff
 
utils/__pycache__/tools.cpython-310.pyc ADDED
Binary file (1.15 kB). View file
 
utils/tools.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ kb_df = pd.read_csv("./data/kb.csv")
5
+ def get_kb(db_name, knowledge=None):
6
+ if not knowledge:
7
+ result = kb_df[(kb_df['db_name']==db_name)]
8
+ else:
9
+ result = kb_df[(kb_df['db_name']==db_name) & (kb_df['knowledge'].str.contains(knowledge))]
10
+ return result
11
+
12
+ schema_df = pd.read_csv("./data/db_schema.csv")
13
+ def get_schema(db_name, table_name):
14
+ result = schema_df[(schema_df['db_name']==db_name) & (schema_df['table_name']==table_name)]
15
+ result = result[['schema', 'sample_data']]
16
+ return result
17
+
18
+ def get_tables(db_name):
19
+ result = schema_df[(schema_df['db_name']==db_name)]
20
+ result = result.drop_duplicates(subset=['table_name'])
21
+ tables = result['table_name'].to_list()
22
+ return tables
23
+
24
+ meaning_df = pd.read_csv("./data/column_meanings.csv")
25
+ def get_meaning(db_name, table_name):
26
+ result = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['table_name']==table_name)]
27
+ result = result[['column_name', 'meaning']]
28
+ return result
29
+
30
+ get_kb('solar', 'PP')
31
+ get_schema('solar', 'alerts')
32
+ get_tables('solar')
33
+ get_meaning('solar', 'alerts')