Brave / main.py
Husnain Rasheed
Update main.py
2bdf25f verified
raw
history blame
12.5 kB
import time
import json
import base64
from typing import List, Optional, Dict, Any
from urllib.parse import urlencode, urlparse, parse_qs
from concurrent.futures import ThreadPoolExecutor
import uvicorn
from fastapi import FastAPI, HTTPException, Query, Request, Response
from pydantic import BaseModel, Field
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from curl_cffi.requests import AsyncSession
from bs4 import BeautifulSoup
# --- Pydantic Models for API Responses ---
class BingSearchResult(BaseModel):
url: str = Field(..., description="The URL of the search result.")
title: str = Field(..., description="The title of the search result.")
description: str = Field(..., description="A brief description of the search result.")
metadata: Dict[str, Any] = Field({}, description="Additional metadata for the result.")
class BingImageResult(BaseModel):
title: str = Field(..., description="The title of the image.")
image_url: str = Field(..., description="The direct URL to the full-size image.")
thumbnail_url: str = Field(..., description="The URL to the thumbnail of the image.")
page_url: str = Field(..., description="The URL of the page where the image was found.")
source: str = Field(..., description="The source or domain of the image.")
class BingNewsResult(BaseModel):
title: str = Field(..., description="The title of the news article.")
url: str = Field(..., description="The URL to the news article.")
description: str = Field(..., description="A snippet from the news article.")
source: str = Field("", description="The source of the news article.")
# --- Custom Middleware for Response Headers ---
class CustomHeaderMiddleware(BaseHTTPMiddleware):
async def dispatch(
self, request: Request, call_next: RequestResponseEndpoint
) -> Response:
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
response.headers["X-Response-Time"] = f"{process_time:.4f}s"
response.headers["X-Powered-By"] = "NiansuhAI"
return response
# --- Bing Search Service ---
class BingSearch:
"""Asynchronous Bing search implementation with configurable parameters and advanced features."""
def __init__(
self,
timeout: int = 10,
proxies: Optional[Dict[str, str]] = None,
verify: bool = True,
lang: str = "en-US",
impersonate: str = "chrome110"
):
self.timeout = timeout
self.proxies = proxies if proxies else {}
self.verify = verify
self.lang = lang
self._base_url = "https://www.bing.com"
self.session = AsyncSession(
proxies=self.proxies,
verify=self.verify,
timeout=self.timeout,
impersonate=impersonate
)
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
})
async def _fetch_html(self, url: str) -> str:
try:
resp = await self.session.get(url)
resp.raise_for_status()
return resp.text
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to fetch Bing search results: {e}")
def _get_url(self, tag):
url = tag.get('href', '')
try:
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
if "u" in query_params:
encoded_url = query_params["u"][0][2:]
decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
return decoded_bytes.decode('utf-8')
except Exception:
return url
return url
async def text(
self,
keywords: str,
region: Optional[str] = None,
safesearch: str = "moderate",
max_results: int = 10,
) -> List[BingSearchResult]:
if not keywords:
raise ValueError("Search keywords cannot be empty.")
fetched_results = []
fetched_links = set()
url = f'{self._base_url}/search?q={keywords}&form=QBLH'
if region:
url += f"&setmkt={region}"
while url and len(fetched_results) < max_results:
html = await self._fetch_html(url)
soup = BeautifulSoup(html, "html.parser")
for result in soup.select('ol#b_results > li.b_algo'):
title_tag = result.find('h2')
if not title_tag:
continue
link_tag = title_tag.find('a')
if not link_tag or not link_tag.has_attr('href'):
continue
url_val = self._get_url(link_tag)
title = title_tag.get_text(strip=True)
desc_container = result.find('div', class_='b_caption')
description = desc_container.get_text(strip=True) if desc_container else ''
if url_val and title and url_val not in fetched_links:
fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
fetched_links.add(url_val)
if len(fetched_results) >= max_results:
break
if len(fetched_results) >= max_results:
break
next_page_tag = soup.select_one('a.sb_pagN')
url = self._base_url + next_page_tag['href'] if next_page_tag and next_page_tag.get('href') else None
return fetched_results[:max_results]
async def suggestions(self, query: str, region: Optional[str] = None) -> List[str]:
if not query:
raise ValueError("Search query cannot be empty.")
params = {"query": query, "mkt": region if region else "en-US"}
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
try:
resp = await self.session.get(url)
resp.raise_for_status()
data = resp.json()
return data[1] if isinstance(data, list) and len(data) > 1 else []
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to fetch suggestions: {e}")
async def images(
self,
keywords: str,
region: Optional[str] = None,
safesearch: str = "moderate",
max_results: int = 10
) -> List[BingImageResult]:
if not keywords:
raise ValueError("Search keywords cannot be empty.")
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
params = {
"q": keywords, "count": max_results, "setlang": self.lang,
"safeSearch": safe_map.get(safesearch.lower(), "Moderate"),
}
if region:
params["mkt"] = region
url = f"{self._base_url}/images/search?{urlencode(params)}"
html = await self._fetch_html(url)
soup = BeautifulSoup(html, "html.parser")
results = []
for item in soup.select("a.iusc"):
try:
meta = json.loads(item.get("m", '{}'))
if meta.get("murl"):
results.append(
BingImageResult(
title=meta.get("t", ""),
image_url=meta.get("murl", ""),
thumbnail_url=meta.get("turl", ""),
page_url=meta.get("purl", ""),
source=meta.get("surl", "")
)
)
if len(results) >= max_results:
break
except (json.JSONDecodeError, KeyError):
continue
return results[:max_results]
async def news(
self,
keywords: str,
region: Optional[str] = None,
safesearch: str = "moderate",
max_results: int = 10,
) -> List[BingNewsResult]:
if not keywords:
raise ValueError("Search keywords cannot be empty.")
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
params = {
"q": keywords, "form": "QBNH",
"safeSearch": safe_map.get(safesearch.lower(), "Moderate"),
}
if region:
params["mkt"] = region
url = f"{self._base_url}/news/search?{urlencode(params)}"
html = await self._fetch_html(url)
soup = BeautifulSoup(html, "html.parser")
results = []
for item in soup.select("div.news-card"):
a_tag = item.find("a", class_="title")
if not a_tag:
continue
results.append(
BingNewsResult(
title=a_tag.get_text(strip=True),
url=a_tag.get('href', ''),
description=item.find("div", class_="snippet").get_text(strip=True) if item.find("div", class_="snippet") else "",
source=item.find("div", class_="source").get_text(strip=True).split('·')[0].strip() if item.find("div", class_="source") else "",
)
)
if len(results) >= max_results:
break
return results[:max_results]
# --- FastAPI Application Setup ---
app = FastAPI(
title="Bing Search API",
description="A FastAPI wrapper for the BingSearch library with advanced features, powered by NiansuhAI.",
version="2.0.0",
)
app.add_middleware(CustomHeaderMiddleware)
bing_search_service = BingSearch()
# --- API Endpoints ---
@app.get("/search", response_model=List[BingSearchResult], summary="Perform a text search")
async def text_search(
query: str = Query(..., description="The search keywords."),
region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
max_results: int = Query(10, description="Maximum number of results to return."),
):
try:
return await bing_search_service.text(
keywords=query, region=region, safesearch=safesearch, max_results=max_results
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/suggestions", response_model=List[str], summary="Get search suggestions")
async def get_suggestions(
query: str = Query(..., description="The search query for which to fetch suggestions."),
region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
):
try:
return await bing_search_service.suggestions(query=query, region=region)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/images", response_model=List[BingImageResult], summary="Perform an image search")
async def image_search(
query: str = Query(..., description="The search keywords for images."),
region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
max_results: int = Query(10, description="Maximum number of image results to return."),
):
try:
return await bing_search_service.images(
keywords=query, region=region, safesearch=safesearch, max_results=max_results
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@app.get("/news", response_model=List[BingNewsResult], summary="Perform a news search")
async def news_search(
query: str = Query(..., description="The search keywords for news."),
region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
max_results: int = Query(10, description="Maximum number of news results to return."),
):
try:
return await bing_search_service.news(
keywords=query, region=region, safesearch=safesearch, max_results=max_results
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)