LiveSQLBench-Lite-MCP

Sleeping

App Files Files Community

Robin Chiu commited on Nov 17, 2025

Commit

ed2fe48

1 Parent(s): 07cc8e5

add the data and utils.

Browse files

Files changed (6) hide show

app.py +163 -214
data/column_meanings.csv +0 -0
data/db_schema.csv +0 -0
data/kb.csv +0 -0
utils/__pycache__/tools.cpython-310.pyc +0 -0
utils/tools.py +33 -0

app.py CHANGED Viewed

@@ -1,244 +1,193 @@
-# %%
-import requests
-from bs4 import BeautifulSoup
 import gradio as gr
-def parse_news_item(html: str) -> dict:
     """
-    Parse HTML of a news item to extract link, time, headline, and text.
-    Args:
-        html: The HTML string of a news item.
     Returns:
-        A dictionary containing link, time, headline, and text.
-    Raises:
-        Exception: For parsing errors or other unexpected errors.
     """
-    try:
-        soup = BeautifulSoup(html, "html.parser")
-        # Get the anchor tag containing the link
-        link_tag = soup.find("a", href=True)
-        link = link_tag["href"] if link_tag else None
-        # Get the headline inside <h3>
-        headline_tag = soup.find("h3", class_="story__headline")
-        headline = headline_tag.get_text(strip=True) if headline_tag else None
-        # Get the text inside <p>
-        text_tag = soup.find("p", class_="story__text")
-        text = text_tag.get_text(strip=True) if text_tag else None
-        # Get the time inside <time>
-        time_tag = soup.find("time")
-        time = time_tag.get_text(strip=True) if time_tag else None
-        return {
-            "link": link,
-            "time": time,
-            "headline": headline,
-            "text": text,
-        }
-    except Exception as e:
-        print(f"Error parsing news item: {e}")
-        raise
-# %%
-def search_news(keyword, page=1) -> list:
     """
-    Fetch news articles related to a keyword from udn.com.
     Args:
-        keyword: The search keyword for news articles.
-        page: The page number to fetch (default is 1).
     Returns:
-        A list of dictionaries containing link, time, headline and text of news article data.
-    Raises:
-        requests.RequestException: If there's an error fetching data from the URL.
-        Exception: For other unexpected errors.
     """
-    try:
-        url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
-        response = requests.get(url)
-        if response.status_code != 200:
-            raise requests.RequestException(f"Failed to retrieve data: {response.status_code}")
-        soup = BeautifulSoup(response.text, 'html.parser')
-        articles = soup.select('div > div > main > section > ul > li')
-        results = []
-        for article in articles:
-            try:
-                article_html = article.prettify()
-                data = parse_news_item(article_html)
-                # change dict to list
-                data_list = list(data.values())
-                results.append(data_list)
-            except Exception as e:
-                print(f"Error parsing article: {e}")
-                continue
-        return results
-    except requests.RequestException as e:
-        print(f"Network error in search_news: {e}")
-        raise
-    except Exception as e:
-        print(f"Unexpected error in search_news: {e}")
-        raise
-# search_news('台積電', 1)  # Example usage to fetch news articles related to '台積電'
-# %%
-# write a function to get the url and parse the content
-def get_content(url) -> dict:
     """
-    Fetch and parse the content of a given URL.
     Args:
-        url: The URL to fetch and parse.
     Returns:
-        A dictionary containing the title, text content, and HTML of the page.
-    Raises:
-        requests.RequestException: If there's an error fetching data from the URL.
-        Exception: For other unexpected errors.
     """
-    try:
-        response = requests.get(url)
-        if response.status_code != 200:
-            raise requests.RequestException(f"Failed to retrieve {url}: {response.status_code}")
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # using select to get the text inside the #article_body
-        # This assumes the content is inside an element with id="article_body"
-        article_body = soup.select_one('#article_body')
-        text_content = ''
-        if article_body:
-            text_content = article_body.get_text(separator='\n', strip=True)
-        return {
-            'link': url,
-            'title': soup.title.string if soup.title else 'No title',
-            'text': text_content
-        }
-    except requests.RequestException as e:
-        print(f"Network error in get_content: {e}")
-        raise
-    except Exception as e:
-        print(f"Unexpected error in get_content: {e}")
-        raise
-# %%
-from smolagents import Tool, CodeAgent, LiteLLMModel, ToolCollection, ActionStep, FinalAnswerStep
-import os
-model_name = os.environ.get("AI_MODEL", "openrouter/qwen/qwen-2.5-coder-32b-instruct:free")
-model = LiteLLMModel(model_name, api_key=os.environ["OPENROUTER_API_KEY"])
-url = "https://robin0307-newsmcp.hf.space/gradio_api/mcp/sse"
-server_parameters = {"url": url, "transport": "sse"}
-def newsAgent(task: str) -> str:
     """
-    News Agent to handle the news task.
     Args:
-        task: The task description.
     Returns:
-        The result of the Task.
-    Raises:
-        Exception: For errors during agent execution.
     """
-    try:
-        result = ""
-        with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as mcp_tools:
-            agent = CodeAgent(tools=[*mcp_tools.tools[:2]], model=model)
-            for event in agent.run(task, stream=True, max_steps=5):
-                if isinstance(event, ActionStep):
-                    result += f"\n## ======Step {event.step_number}======\n### Action\n```python\n{event.code_action}\n```\n### Observation\n{event.observations}"
-                    # yield result
-                if isinstance(event, FinalAnswerStep):
-                    result += f"\n## ======Final======\n{event.output}"
-                    # yield result
-        return result
-    except Exception as e:
-        error_msg = f"Error in newsAgent: {e}"
-        print(error_msg)
-        raise Exception(error_msg) from e
-# get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result')  # Example usage to fetch content from a specific URL
-# %%
-# using the gradio to create two tab
-# 1. search news
-# 2. get content from url
-def main():
-    with gr.Blocks() as demo:
-        gr.Markdown("# News Search and Content Fetcher")
-        with gr.Tab("Search News"):
-            keyword = gr.Textbox(label="Keyword", placeholder="Enter keyword to search news")
-            page = gr.Number(label="Page Number", value=1, step=1)
-            search_button = gr.Button("Search")
-            search_results = gr.DataFrame(label="Search Results", headers=["Link", "Time", "Headline", "Text"])
-            # Examples for Search News tab
-            gr.Examples(
-                examples=[
-                    ["AI", 1],
-                    ["華碩", 2]
-                ],
-                inputs=[keyword, page],
-                outputs=search_results,
-                fn=search_news,
-                cache_examples=False
-            )
-            search_button.click(search_news, inputs=[keyword, page], outputs=search_results)
-        with gr.Tab("Get Content from URL"):
-            url_input = gr.Textbox(label="URL", placeholder="Enter URL to fetch content")
-            content_output = gr.JSON(label="Content Output")
-            # Examples for Get Content of News tab
-            gr.Examples(
-                examples=[
-                    ["https://money.udn.com/money/story/5722/8870335?from=edn_search_result"],
-                    ["https://money.udn.com/money/story/5612/8868152?from=edn_search_result"]
-                ],
-                inputs=[url_input],
-                outputs=content_output,
-                fn=get_content,
-                cache_examples=False
-            )
-            url_input.submit(get_content, inputs=url_input, outputs=content_output)
-        with gr.Tab("News Agent"):
-            agent_input = gr.Textbox(label="Task", placeholder="Enter the task")
-            # run_button = gr.Button("Run")
-            result_output = gr.Markdown(label="Result")
-             # Examples for Get Content of News tab
-            gr.Examples(
-                examples=[
-                    ["華碩今日新聞"],
-                    ["華碩和Nvidia今日新聞"]
-                ],
-                inputs=[agent_input],
-                outputs=result_output,
-                fn=newsAgent,
-                cache_examples=True
-            )
-            agent_input.submit(newsAgent, inputs=agent_input, outputs=result_output)
-    demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"], share=True)
 if __name__ == "__main__":
-    main()

 import gradio as gr
+import pandas as pd
+import sys
+import os
+from utils.tools import get_kb, get_schema, get_tables, get_meaning
+@gr.mcp.tool()
+def get_all_databases() -> list:
     """
+    Get all available database names from the schema file.
+    This function reads the database schema CSV file and extracts unique database names.
     Returns:
+        list: A sorted list of unique database names available in the system.
+    Example:
+        >>> databases = get_all_databases()
+        >>> print(databases)
+        ['db1', 'db2', 'db3']
     """
+    # 從 schema_df 中獲取所有唯一的 db_name
+    schema_df = pd.read_csv("./data/db_schema.csv")
+    return sorted(schema_df['db_name'].unique().tolist())
+def kb_query(db_name, knowledge_keyword):
     """
+    Query the knowledge base for a specific database with optional keyword filtering.
+    This function retrieves knowledge base information for a specified database.
+    If a keyword is provided, it filters the results based on that keyword.
     Args:
+        db_name (str): The name of the database to query. Must not be empty.
+        knowledge_keyword (str): Optional keyword to filter knowledge base results.
+                               If empty or None, returns all knowledge for the database.
     Returns:
+        pandas.DataFrame: Query results containing knowledge base data, or error message
+                         if no database is selected or no results found.
+    Example:
+        >>> result = kb_query("sales_db", "customer")
+        >>> print(result)
+        # Returns DataFrame with customer-related knowledge from sales_db
     """
+    if not db_name:
+        return pd.DataFrame({"message": ["請先選擇資料庫"]})
+    if not knowledge_keyword:
+        result = get_kb(db_name)
+    else:
+        result = get_kb(db_name, knowledge_keyword)
+    if len(result) == 0:
+        return pd.DataFrame({"message": ["沒有找到相關知識"]})
+    return result
+def schema_query(db_name, table_name):
     """
+    Query the schema structure for a specific table in a database.
+    This function retrieves detailed schema information for a specified table
+    within a given database, including column definitions, data types, and constraints.
     Args:
+        db_name (str): The name of the database containing the table. Must not be empty.
+        table_name (str): The name of the table to query schema for. Must not be empty.
     Returns:
+        pandas.DataFrame: Query results containing table schema information, or error message
+                         if parameters are missing or no schema found.
+    Example:
+        >>> result = schema_query("sales_db", "customers")
+        >>> print(result)
+        # Returns DataFrame with column definitions for customers table
     """
+    if not db_name or not table_name:
+        return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
+    result = get_schema(db_name, table_name)
+    if len(result) == 0:
+        return pd.DataFrame({"message": ["沒有找到相關資料表結構"]})
+    return result
+def tables_query(db_name):
     """
+    Get list of all tables available in a specific database.
+    This function retrieves all table names that exist within the specified database.
     Args:
+        db_name (str): The name of the database to query tables from.
+                      If empty or None, returns empty list.
     Returns:
+        list: List of table names in the specified database. Returns empty list
+              if database name is not provided or no tables found.
+    Example:
+        >>> tables = tables_query("sales_db")
+        >>> print(tables)
+        ['customers', 'orders', 'products', 'inventory']
     """
+    if not db_name:
+        return []
+    return get_tables(db_name)
+def meaning_query(db_name, table_name):
+    """
+    Query the meaning and description of columns in a specific table.
+    This function retrieves detailed explanations and meanings for each column
+    in the specified table, helping users understand the purpose and content
+    of each field.
+    Args:
+        db_name (str): The name of the database containing the table. Must not be empty.
+        table_name (str): The name of the table to query column meanings for. Must not be empty.
+    Returns:
+        pandas.DataFrame: Query results containing column meanings and descriptions,
+                         or error message if parameters are missing or no meanings found.
+    Example:
+        >>> result = meaning_query("sales_db", "customers")
+        >>> print(result)
+        # Returns DataFrame with explanations for each column in customers table
+    """
+    if not db_name or not table_name:
+        return pd.DataFrame({"message": ["請選擇資料庫和資料表"]})
+    result = get_meaning(db_name, table_name)
+    if len(result) == 0:
+        return pd.DataFrame({"message": ["沒有找到相關欄位意義"]})
+    return result
+# 建立 Gradio 界面
+with gr.Blocks(title="資料庫查詢工具") as demo:
+    gr.Markdown("# 資料庫查詢工具")
+    gr.Markdown("這個工具可以幫助您查詢資料庫的知識庫、資料表結構和欄位意義。")
+    # 獲取所有可用的資料庫
+    all_dbs = get_all_databases()
+    with gr.Tab("知識庫查詢"):
+        with gr.Row():
+            kb_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
+            kb_keyword = gr.Textbox(label="知識關鍵字 (可選)")
+        kb_search = gr.Button("查詢知識庫")
+        kb_result = gr.DataFrame(label="查詢結果")
+        kb_search.click(kb_query, inputs=[kb_db, kb_keyword], outputs=kb_result)
+        gr.api(get_all_databases)
+    with gr.Tab("資料表查詢"):
+        with gr.Row():
+            kb_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
+        kb_search = gr.Button("查詢資料表")
+        kb_result = gr.DataFrame(label="查詢結果")
+        kb_search.click(tables_query, inputs=[kb_db], outputs=kb_result)
+    with gr.Tab("資料表結構查詢"):
+        with gr.Row():
+            schema_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
+            schema_table = gr.Text(label="選擇資料表")
+        schema_search = gr.Button("查詢資料表結構")
+        schema_result = gr.DataFrame(label="查詢結果")
+        # 當資料庫選擇變更時，更新資料表下拉選單
+        # schema_db.change(update_tables, inputs=schema_db, outputs=schema_table)
+        schema_search.click(schema_query, inputs=[schema_db, schema_table], outputs=schema_result)
+    with gr.Tab("欄位意義查詢"):
+        with gr.Row():
+            meaning_db = gr.Dropdown(choices=all_dbs, label="選擇資料庫", value=all_dbs[0] if all_dbs else None)
+            meaning_table = gr.Text(label="選擇資料表")
+        meaning_search = gr.Button("查詢欄位意義")
+        meaning_result = gr.DataFrame(label="查詢結果")
+        # 當資料庫選擇變更時，更新資料表下拉選單
+        # meaning_db.change(update_tables, inputs=meaning_db, outputs=meaning_table)
+        meaning_search.click(meaning_query, inputs=[meaning_db, meaning_table], outputs=meaning_result)
+# 啟動 Gradio 應用程式
 if __name__ == "__main__":
+    demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"], share=True)

data/column_meanings.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/db_schema.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/kb.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/__pycache__/tools.cpython-310.pyc ADDED Viewed

Binary file (1.15 kB). View file

utils/tools.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import pandas as pd
+kb_df = pd.read_csv("./data/kb.csv")
+def get_kb(db_name, knowledge=None):
+    if not knowledge:
+        result = kb_df[(kb_df['db_name']==db_name)]
+    else:
+        result = kb_df[(kb_df['db_name']==db_name) & (kb_df['knowledge'].str.contains(knowledge))]
+    return result
+schema_df = pd.read_csv("./data/db_schema.csv")
+def get_schema(db_name, table_name):
+    result = schema_df[(schema_df['db_name']==db_name) & (schema_df['table_name']==table_name)]
+    result = result[['schema', 'sample_data']]
+    return result
+def get_tables(db_name):
+    result = schema_df[(schema_df['db_name']==db_name)]
+    result = result.drop_duplicates(subset=['table_name'])
+    tables = result['table_name'].to_list()
+    return tables
+meaning_df = pd.read_csv("./data/column_meanings.csv")
+def get_meaning(db_name, table_name):
+    result = meaning_df[(meaning_df['db_name']==db_name) & (meaning_df['table_name']==table_name)]
+    result = result[['column_name', 'meaning']]
+    return result
+get_kb('solar', 'PP')
+get_schema('solar', 'alerts')
+get_tables('solar')
+get_meaning('solar', 'alerts')