File size: 2,789 Bytes
5b6e532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac4b1e4
5b6e532
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import re
import os
import html
import httpx
from bs4 import BeautifulSoup
from mcp.server.fastmcp import FastMCP, Context
import aiofiles
import mimetypes
import gradio as gr


def utf8_clean(text: str) -> str:
    """Clean and decode text safely to UTF-8"""
    return html.unescape(text).strip()

async def duckduckgo_detailed_search(query: str, max_results: int = 10) -> str:
    """Perform a detailed DuckDuckGo search and return top results with title, URL, and excerpt"""
    url = "https://html.duckduckgo.com/html/"
    data = {'q': query}
    headers = {'User-Agent': 'Mozilla/5.0'}

    async with httpx.AsyncClient() as client:
        response = await client.post(url, data=data, headers=headers)
    
    soup = BeautifulSoup(response.text, 'html.parser')
    results = []

    for result in soup.find_all('div', class_='result'):
        if len(results) >= max_results:
            break

        link_tag = result.find('a', class_='result__a')
        snippet_tag = result.find('a', class_='result__snippet')
        snippet_fallback = result.find('div', class_='result__snippet')

        if link_tag:
            title = utf8_clean(link_tag.get_text())
            link = utf8_clean(link_tag.get('href'))
            snippet = utf8_clean(snippet_tag.get_text()) if snippet_tag else (
                utf8_clean(snippet_fallback.get_text()) if snippet_fallback else "No excerpt available.")

            results.append({'title': title, 'url': link, 'excerpt': snippet})

    if not results:
        return "## Search Results\n\n_No results found._"
    
    md = ["## Search Results\n"]
    for r in results:
        md.append(f"### \"{r['title']}\"\n**URL:** {r['url']}  \n**Excerpt:** {r['excerpt']}\n")

    return "\n".join(md)

# Wrap the async search function
def duckduckgo_sync(query: str) -> str:
    import asyncio
    return asyncio.run(duckduckgo_detailed_search(query))

async def semantic_search(query: str, limit: int = 5) -> dict:
    """Perform a semantic content search using an external API and return top results"""
    url = "https://content_retrival.cfapps.eu10.hana.ondemand.com/search"
    params = {
        "search_type": "semantic",
        "query": query,
        "limit": limit
    }
    headers = {"Accept": "application/json"}

    async with httpx.AsyncClient(verify=False) as client:
        response = await client.get(url, params=params, headers=headers)
        return response.json()


# Create a Gradio Interface
demo = gr.Interface(
    fn=duckduckgo_sync,
    inputs="text",
    outputs="text",
    title="DuckDuckGo Search",
    description="Performs a DuckDuckGo search and returns top results."
)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        mcp_server=True
    )