Spaces:
Sleeping
Sleeping
| import re | |
| import os | |
| import html | |
| import httpx | |
| from bs4 import BeautifulSoup | |
| from mcp.server.fastmcp import FastMCP, Context | |
| import aiofiles | |
| import mimetypes | |
| import gradio as gr | |
| def utf8_clean(text: str) -> str: | |
| """Clean and decode text safely to UTF-8""" | |
| return html.unescape(text).strip() | |
| async def duckduckgo_detailed_search(query: str, max_results: int = 10) -> str: | |
| """Perform a detailed DuckDuckGo search and return top results with title, URL, and excerpt""" | |
| url = "https://html.duckduckgo.com/html/" | |
| data = {'q': query} | |
| headers = {'User-Agent': 'Mozilla/5.0'} | |
| async with httpx.AsyncClient() as client: | |
| response = await client.post(url, data=data, headers=headers) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = [] | |
| for result in soup.find_all('div', class_='result'): | |
| if len(results) >= max_results: | |
| break | |
| link_tag = result.find('a', class_='result__a') | |
| snippet_tag = result.find('a', class_='result__snippet') | |
| snippet_fallback = result.find('div', class_='result__snippet') | |
| if link_tag: | |
| title = utf8_clean(link_tag.get_text()) | |
| link = utf8_clean(link_tag.get('href')) | |
| snippet = utf8_clean(snippet_tag.get_text()) if snippet_tag else ( | |
| utf8_clean(snippet_fallback.get_text()) if snippet_fallback else "No excerpt available.") | |
| results.append({'title': title, 'url': link, 'excerpt': snippet}) | |
| if not results: | |
| return "## Search Results\n\n_No results found._" | |
| md = ["## Search Results\n"] | |
| for r in results: | |
| md.append(f"### \"{r['title']}\"\n**URL:** {r['url']} \n**Excerpt:** {r['excerpt']}\n") | |
| return "\n".join(md) | |
| # Wrap the async search function | |
| def duckduckgo_sync(query: str) -> str: | |
| import asyncio | |
| return asyncio.run(duckduckgo_detailed_search(query)) | |
| async def semantic_search(query: str, limit: int = 5) -> dict: | |
| """Perform a semantic content search using an external API and return top results""" | |
| url = "https://content_retrival.cfapps.eu10.hana.ondemand.com/search" | |
| params = { | |
| "search_type": "semantic", | |
| "query": query, | |
| "limit": limit | |
| } | |
| headers = {"Accept": "application/json"} | |
| async with httpx.AsyncClient(verify=False) as client: | |
| response = await client.get(url, params=params, headers=headers) | |
| return response.json() | |
| # Create a Gradio Interface | |
| demo = gr.Interface( | |
| fn=duckduckgo_sync, | |
| inputs="text", | |
| outputs="text", | |
| title="DuckDuckGo Search", | |
| description="Performs a DuckDuckGo search and returns top results." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| mcp_server=True | |
| ) | |