Spaces:

Greff3
/

Brave

Running

App Files Files Community

rkihacker commited on Sep 27

Commit

2b7ed67

verified ·

1 Parent(s): ef5f360

Update main.py

Browse files

Files changed (1) hide show

main.py +73 -204

main.py CHANGED Viewed

@@ -1,209 +1,78 @@
-from typing import List, Optional, Set
-from urllib.parse import parse_qs, unquote, urlparse
-import httpx
-from bs4 import BeautifulSoup
-from fastapi import FastAPI, HTTPException, Query
-UPSTREAM_URL = "https://lite.duckduckgo.com/lite/"
-DEFAULT_KL = "wt-wt"
-MAX_PAGES = 5
-PAGE_SIZE_HINT = 30
-MAX_RESULTS = MAX_PAGES * PAGE_SIZE_HINT
-app = FastAPI(
-    title="DuckDuckGo Lite Web Search Proxy",
-    description=(
-        "A tiny FastAPI wrapper that proxies search queries to DuckDuckGo Lite and returns"
-        " structured JSON results."
-    ),
-    version="1.0.0",
 )
-def _extract_results(html: str) -> List[dict]:
-    """Parse DuckDuckGo Lite HTML into a list of search results."""
-    soup = BeautifulSoup(html, "html.parser")
-    results: List[dict] = []
-    for table in soup.select("table.result"):
-        link_tag = table.select_one("td.result-link a, a.result-link")
-        if not link_tag:
-            continue
-        title = link_tag.get_text(strip=True)
-        url = _normalize_url(link_tag.get("href"))
-        if not title or not url:
-            continue
-        snippet = _extract_snippet_text(table, link_tag)
-        results.append({
-            "title": title,
-            "url": url,
-            "snippet": snippet,
-        })
-    if not results:
-        # As a fallback, try to find plain links if the expected structure changes.
-        for link_tag in soup.select("a.result-link"):
-            title = link_tag.get_text(strip=True)
-            url = _normalize_url(link_tag.get("href"))
-            if not title or not url:
-                continue
-            snippet = _extract_snippet_text(link_tag.find_parent("table") or soup, link_tag)
-            results.append({
-                "title": title,
-                "url": url,
-                "snippet": snippet,
-            })
-    return results
-def _extract_snippet_text(container, link_tag) -> Optional[str]:
-    """Best effort extraction of result snippet text."""
-    if not container:
-        return None
-    def _clean_text(tag) -> Optional[str]:
-        if not tag:
-            return None
-        text = tag.get_text(" ", strip=True)
-        return text or None
-    # Prefer rows that follow the link row inside the same table.
-    link_row = link_tag.find_parent("tr")
-    if link_row:
-        for sibling_row in link_row.find_next_siblings("tr"):
-            candidate = sibling_row.find("td") or sibling_row.find("div")
-            if not candidate:
-                continue
-            classes = {cls.lower() for cls in candidate.get("class", [])}
-            if not candidate.get_text(strip=True):
-                continue
-            if "result-snippet" in classes or any("snippet" in cls for cls in classes) or not candidate.find("a"):
-                text = _clean_text(candidate)
-                if text:
-                    return text
-    # Fallback: look for known snippet containers within the table.
-    for candidate in container.select("td.result-snippet, div.result-snippet"):
-        text = _clean_text(candidate)
-        if text:
-            return text
-    return None
-def _normalize_url(href: Optional[str]) -> Optional[str]:
-    """Convert protocol-relative and redirect URLs to absolute targets."""
-    if not href:
-        return None
-    href = href.strip()
-    if href.startswith("//"):
-        href = f"https:{href}"
-    parsed = urlparse(href)
-    if (
-        parsed.netloc.endswith("duckduckgo.com")
-        and parsed.path.startswith("/l")
-    ):
-        query = parse_qs(parsed.query)
-        uddg = query.get("uddg", [])
-        if uddg:
-            return unquote(uddg[0])
-    return href
-async def _collect_results(
-    client: httpx.AsyncClient,
-    base_params: dict,
-    headers: dict,
-    limit: Optional[int],
-) -> List[dict]:
-    collected: List[dict] = []
-    seen_urls: Set[str] = set()
-    offset = int(base_params.get("s", "0") or 0)
-    pages_fetched = 0
-    while True:
-        page_params = dict(base_params)
-        page_params["s"] = str(offset)
-        response = await client.get(UPSTREAM_URL, params=page_params, headers=headers)
-        response.raise_for_status()
-        page_results = _extract_results(response.text)
-        if not page_results:
-            break
-        for item in page_results:
-            url = item.get("url")
-            if url and url in seen_urls:
-                continue
-            if url:
-                seen_urls.add(url)
-            collected.append(item)
-            if limit and len(collected) >= limit:
-                return collected[:limit]
-        pages_fetched += 1
-        if limit is None or pages_fetched >= MAX_PAGES:
-            break
-        offset += len(page_results) or PAGE_SIZE_HINT
-    return collected
-@app.post("/lite/")
-async def search_duckduckgo_lite(
-    q: str = Query(..., description="keywords for query", min_length=1),
-    s: Optional[int] = Query(None, description="can be `0`"),
-    o: Optional[str] = Query(None, description="can be `json`"),
-    api: Optional[str] = Query(None, description="can be `d.js`"),
-    kl: Optional[str] = Query(None, description="market/locale code"),
-    bing_market: Optional[str] = Query(None, description="market/locale code"),
-    limit: Optional[int] = Query(
-        None,
-        gt=0,
-        le=MAX_RESULTS,
-        description=(
-            "Maximum number of results to return. If greater than a single page, the service"
-            " will fetch additional DuckDuckGo Lite pages up to the configured maximum."
-        ),
-    ),
 ):
-    params = {"q": q}
-    if s is not None:
-        params["s"] = str(max(s, 0))
-    if o:
-        params["o"] = o
-    if api:
-        params["api"] = api
-    params["kl"] = kl or DEFAULT_KL
-    if bing_market:
-        params["bing_market"] = bing_market
-    headers = {
-        "User-Agent": (
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-            "(KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
-        )
-    }
-    try:
-        async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
-            results = await _collect_results(client, params, headers, limit)
-    except httpx.HTTPError as exc:
-        raise HTTPException(status_code=502, detail="Upstream search failed") from exc
-    return {
-        "query": q,
-        "kl": params.get("kl"),
-        "bing_market": params.get("bing_market"),
-        "count": len(results),
-        "results": results,
-    }

+# main.py
+from fastapi import FastAPI, Query
+from typing import List, Optional
+from pydantic import BaseModel
+import uvicorn
+# Paste the entire BingSearch library code here
+# (Omitted for brevity in this response, but include the full code from the query)
+app = FastAPI(title="BingSearch API", description="API for Bing search functionalities", version="1.0")
+bing = BingSearch(
+    timeout=10,
+    proxies=None,
+    verify=True,
+    lang="en-US",
+    sleep_interval=0.0,
+    impersonate="chrome110"
 )
+class SearchResult(BaseModel):
+    url: str
+    title: str
+    description: str
+class ImageResult(BaseModel):
+    title: str
+    image: str
+    thumbnail: str
+    url: str
+    source: str
+class NewsResult(BaseModel):
+    title: str
+    url: str
+    description: str
+    source: str
+@app.get("/search/text", response_model=List[SearchResult])
+def search_text(
+    keywords: str = Query(..., description="Search keywords"),
+    region: Optional[str] = Query(None, description="Region for search"),
+    safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
+    max_results: int = Query(10, description="Maximum number of results"),
+    unique: bool = Query(True, description="Exclude duplicate URLs")
 ):
+    results = bing.text(keywords, region, safesearch, max_results, unique)
+    return [SearchResult(url=r.url, title=r.title, description=r.description) for r in results]
+@app.get("/search/suggestions", response_model=List[str])
+def get_suggestions(
+    query: str = Query(..., description="Query for suggestions"),
+    region: Optional[str] = Query(None, description="Region for suggestions")
+):
+    return bing.suggestions(query, region)
+@app.get("/search/images", response_model=List[ImageResult])
+def search_images(
+    keywords: str = Query(..., description="Search keywords"),
+    region: Optional[str] = Query(None, description="Region for search"),
+    safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
+    max_results: int = Query(10, description="Maximum number of results")
+):
+    results = bing.images(keywords, region, safesearch, max_results)
+    return [ImageResult(title=r.title, image=r.image, thumbnail=r.thumbnail, url=r.url, source=r.source) for r in results]
+@app.get("/search/news", response_model=List[NewsResult])
+def search_news(
+    keywords: str = Query(..., description="Search keywords"),
+    region: Optional[str] = Query(None, description="Region for search"),
+    safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
+    max_results: int = Query(10, description="Maximum number of results")
+):
+    results = bing.news(keywords, region, safesearch, max_results)
+    return [NewsResult(title=r.title, url=r.url, description=r.description, source=r.source) for r in results]
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)