LiveSQLBench-Lite-MCP

Running

App Files Files Community

Robin Chiu commited on Jun 26, 2025

Commit

6185b4f

1 Parent(s): b891a5d

init version

Browse files

Files changed (4) hide show

app.py +131 -0
pyproject.toml +10 -0
requirements.txt +9 -0
uv.lock +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# %%
+from bs4 import BeautifulSoup
+def parse_news_item(html: str) -> dict:
+    soup = BeautifulSoup(html, "html.parser")
+    # Get the anchor tag containing the link
+    link_tag = soup.find("a", href=True)
+    link = link_tag["href"] if link_tag else None
+    # Get the headline inside <h3>
+    headline_tag = soup.find("h3", class_="story__headline")
+    headline = headline_tag.get_text(strip=True) if headline_tag else None
+    # Get the text inside <p>
+    text_tag = soup.find("p", class_="story__text")
+    text = text_tag.get_text(strip=True) if text_tag else None
+    # Get the time inside <time>
+    time_tag = soup.find("time")
+    time = time_tag.get_text(strip=True) if time_tag else None
+    return {
+        "link": link,
+        "time": time,
+        "headline": headline,
+        "text": text,
+    }
+# %%
+import requests
+from bs4 import BeautifulSoup
+def search_news(keyword, page=1):
+    """
+    Fetch news articles related to a keyword from udn.com.
+    Args:
+        keyword: The search keyword for news articles.
+        page: The page number to fetch (default is 1).
+    Returns:
+        A list of dictionaries containing news article data.
+    """
+    url = f"https://money.udn.com/search/result/1001/{keyword}/{page}"
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f"Failed to retrieve data: {response.status_code}")
+        return []
+    soup = BeautifulSoup(response.text, 'html.parser')
+    articles = soup.select('div > div > main > section > ul > li')
+    results = []
+    for article in articles:
+        article_html = article.prettify()
+        data = parse_news_item(article_html)
+        # change dict to list
+        data_list = list(data.values())
+        results.append(data_list)
+    return results
+# search_news('台積電', 1)  # Example usage to fetch news articles related to '台積電'
+# %%
+# write a function to get the url and parse the content
+def get_content(url):
+    """
+    Fetch and parse the content of a given URL.
+    Args:
+        url: The URL to fetch and parse.
+    Returns:
+        A dictionary containing the title, text content, and HTML of the page.
+    """
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f"Failed to retrieve {url}: {response.status_code}")
+        return None
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # using select to get the text inside the #article_body
+    # This assumes the content is inside an element with id="article_body"
+    article_body = soup.select_one('#article_body')
+    text_content = ''
+    if article_body:
+        text_content = article_body.get_text(separator='\n', strip=True)
+    return {
+        'link': url,
+        'title': soup.title.string if soup.title else 'No title',
+        'text': text_content
+    }
+# get_content('https://money.udn.com/money/story/5612/8832289?from=edn_search_result')  # Example usage to fetch content from a specific URL
+# %%
+# using the gradio to create two tab
+# 1. search news
+# 2. get content from url
+import gradio as gr
+def main():
+    with gr.Blocks() as demo:
+        gr.Markdown("# News Search and Content Fetcher")
+        with gr.Tab("Search News"):
+            keyword = gr.Textbox(label="Keyword", placeholder="Enter keyword to search news")
+            page = gr.Number(label="Page Number", value=1, step=1)
+            search_button = gr.Button("Search")
+            search_results = gr.DataFrame(label="Search Results", headers=["Link", "Time", "Headline", "Text"])
+            search_button.click(search_news, inputs=[keyword, page], outputs=search_results)
+        with gr.Tab("Get Content from URL"):
+            url_input = gr.Textbox(label="URL", placeholder="Enter URL to fetch content")
+            content_output = gr.JSON(label="Content Output")
+            url_input.submit(get_content, inputs=url_input, outputs=content_output)
+    demo.launch(mcp_server=True, server_name="0.0.0.0",allowed_paths=["/"])
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,10 @@

+[project]
+name = "newsagent"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "bs4>=0.0.2",
+    "gradio[mcp]>=5.33.1",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+duckduckgo_search
+smolagents
+gradio[mcp]
+datasets
+langchain
+langchain-chroma
+langchain-text-splitters
+langchain-community
+sentence-transformers

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff