Spaces:

Greff3
/

Brave

Running

App Files Files Community

Brave / main.py

rkihacker

Update main.py

94fa239 verified 3 months ago

raw

history blame

15.1 kB

	from fastapi import FastAPI, HTTPException, Query
	from typing import List, Optional
	from pydantic import BaseModel
	from time import sleep
	from curl_cffi.requests import Session
	from urllib.parse import urlencode, unquote, urlparse, parse_qs
	import base64
	from typing import Dict, Any
	from concurrent.futures import ThreadPoolExecutor
	from webscout.litagent import LitAgent
	from bs4 import BeautifulSoup
	import json

	app = FastAPI(
	title="Bing Search API",
	description="A FastAPI wrapper for the BingSearch library with advanced features.",
	version="1.0.0",
	)

	# --- BingSearch Library Code ---
	# The provided BingSearch code is integrated here directly.

	class BingSearchResult(BaseModel):
	url: str
	title: str
	description: str
	metadata: Dict[str, Any] = {}

	class BingImageResult(BaseModel):
	title: str
	image: str
	thumbnail: str
	url: str
	source: str

	class BingNewsResult(BaseModel):
	title: str
	url: str
	description: str
	source: str = ""

	class BingSearch:
	"""Bing search implementation with configurable parameters and advanced features."""
	_executor: ThreadPoolExecutor = ThreadPoolExecutor()

	def __init__(
	self,
	timeout: int = 10,
	proxies: Optional[Dict[str, str]] = None,
	verify: bool = True,
	lang: str = "en-US",
	sleep_interval: float = 0.0,
	impersonate: str = "chrome110"
	):
	self.timeout = timeout
	self.proxies = proxies if proxies else {}
	self.verify = verify
	self.lang = lang
	self.sleep_interval = sleep_interval
	self._base_url = "https://www.bing.com"
	self.session = Session(
	proxies=self.proxies,
	verify=self.verify,
	timeout=self.timeout,
	impersonate=impersonate
	)
	# It's good practice to set a realistic User-Agent
	self.session.headers.update({
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
	})

	# FIX: Updated selectors to be more robust against Bing UI changes.
	def _selectors(self, element):
	selectors = {
	'links': 'ol#b_results > li', # More generic selector for any list item in results
	'next': 'a.sb_pagN' # Selector for the "Next" page button
	}
	return selectors[element]

	def _first_page(self, query):
	url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
	return {'url': url, 'data': None}

	def _next_page(self, soup):
	selector = self._selectors('next')
	next_page_tag = soup.select_one(selector)
	url = None
	if next_page_tag and next_page_tag.get('href'):
	url = self._base_url + next_page_tag['href']
	return {'url': url, 'data': None}

	def _get_url(self, tag):
	url = tag.get('href', '')
	resp = url
	try:
	parsed_url = urlparse(url)
	query_params = parse_qs(parsed_url.query)
	if "u" in query_params:
	encoded_url = query_params["u"][0][2:]
	try:
	decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
	except base64.binascii.Error as e:
	print(f"Error decoding Base64 string: {e}")
	return url
	resp = decoded_bytes.decode('utf-8')
	except Exception as e:
	print(f"Error decoding Base64 string: {e}")
	return resp

	# FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
	def text(
	self,
	keywords: str,
	region: str = None,
	safesearch: str = "moderate",
	max_results: int = 10,
	unique: bool = True
	) -> List[BingSearchResult]:
	if not keywords:
	raise ValueError("Search keywords cannot be empty")

	fetched_results = []
	fetched_links = set()

	def fetch_page(url):
	try:
	resp = self.session.get(url)
	resp.raise_for_status()
	return resp.text
	except Exception as e:
	raise Exception(f"Bing search failed: {str(e)}")

	current_url = self._first_page(keywords)['url']

	while current_url and len(fetched_results) < max_results:
	html = fetch_page(current_url)
	soup = BeautifulSoup(html, "html.parser")

	# Use the more generic selector for result blocks
	result_blocks = soup.select(self._selectors('links'))

	for result in result_blocks:
	# Find the title and link, which are usually in an <h2> tag
	title_tag = result.find('h2')
	if not title_tag:
	continue

	link_tag = title_tag.find('a')
	if not link_tag or not link_tag.has_attr('href'):
	continue

	url_val = self._get_url(link_tag)
	title = title_tag.get_text(strip=True)

	# Find the description, often in a div with class 'b_caption'
	desc_container = result.find('div', class_='b_caption')
	description = ''
	if desc_container:
	# Find the paragraph within the caption, or use the whole caption text
	desc_p = desc_container.find('p')
	if desc_p:
	description = desc_p.get_text(strip=True)
	else:
	description = desc_container.get_text(strip=True)

	# Fallback if no 'b_caption' is found
	if not description:
	p_tag = result.find('p')
	if p_tag:
	description = p_tag.get_text(strip=True)

	if url_val and title:
	if unique and url_val in fetched_links:
	continue

	fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
	fetched_links.add(url_val)

	if len(fetched_results) >= max_results:
	break

	if len(fetched_results) >= max_results:
	break

	# Find the next page URL
	next_page_info = self._next_page(soup)
	current_url = next_page_info['url']
	if current_url:
	sleep(self.sleep_interval)

	return fetched_results[:max_results]


	def suggestions(self, query: str, region: str = None) -> List[str]:
	if not query:
	raise ValueError("Search query cannot be empty")
	params = {
	"query": query,
	"mkt": region if region else "en-US"
	}
	url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
	try:
	resp = self.session.get(url)
	resp.raise_for_status()
	data = resp.json()
	if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
	return data[1]
	return []
	except Exception as e:
	if hasattr(e, 'response') and e.response is not None:
	raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
	else:
	raise Exception(f"Bing suggestions failed: {str(e)}")

	def images(
	self,
	keywords: str,
	region: str = None,
	safesearch: str = "moderate",
	max_results: int = 10
	) -> List[BingImageResult]:
	if not keywords:
	raise ValueError("Search keywords cannot be empty")
	safe_map = {
	"on": "Strict",
	"moderate": "Moderate",
	"off": "Off"
	}
	safe = safe_map.get(safesearch.lower(), "Moderate")
	params = {
	"q": keywords,
	"count": max_results,
	"setlang": self.lang,
	"safeSearch": safe,
	}
	if region:
	params["mkt"] = region
	url = f"{self._base_url}/images/search?{urlencode(params)}"
	try:
	resp = self.session.get(url)
	resp.raise_for_status()
	html = resp.text
	except Exception as e:
	if hasattr(e, 'response') and e.response is not None:
	raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
	else:
	raise Exception(f"Bing image search failed: {str(e)}")
	soup = BeautifulSoup(html, "html.parser")
	results = []
	for item in soup.select("a.iusc"):
	try:
	m = item.get("m")
	meta = json.loads(m) if m else {}
	image_url = meta.get("murl", "")
	thumb_url = meta.get("turl", "")
	title = meta.get("t", "")
	page_url = meta.get("purl", "")
	source = meta.get("surl", "")
	if image_url:
	results.append(BingImageResult(title=title, image=image_url, thumbnail=thumb_url, url=page_url, source=source))
	if len(results) >= max_results:
	break
	except Exception:
	continue
	return results[:max_results]

	def news(
	self,
	keywords: str,
	region: str = None,
	safesearch: str = "moderate",
	max_results: int = 10,
	) -> List['BingNewsResult']:
	if not keywords:
	raise ValueError("Search keywords cannot be empty")
	safe_map = {
	"on": "Strict",
	"moderate": "Moderate",
	"off": "Off"
	}
	safe = safe_map.get(safesearch.lower(), "Moderate")
	params = {
	"q": keywords,
	"form": "QBNH",
	"safeSearch": safe,
	}
	if region:
	params["mkt"] = region
	url = f"{self._base_url}/news/search?{urlencode(params)}"
	try:
	resp = self.session.get(url)
	resp.raise_for_status()
	except Exception as e:
	if hasattr(e, 'response') and e.response is not None:
	raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
	else:
	raise Exception(f"Bing news search failed: {str(e)}")
	soup = BeautifulSoup(resp.text, "html.parser")
	results = []
	for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
	a_tag = item.find("a")
	title = a_tag.get_text(strip=True) if a_tag else ''
	url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
	desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
	description = desc_tag.get_text(strip=True) if desc_tag else ''
	source_tag = item.find("div", class_="source")
	source = source_tag.get_text(strip=True) if source_tag else ''
	if url_val and title:
	results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
	if len(results) >= max_results:
	break
	if not results:
	for item in soup.select("a.title"):
	title = item.get_text(strip=True)
	url_val = item['href'] if item.has_attr('href') else ''
	description = ''
	source = ''
	if url_val and title:
	results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
	if len(results) >= max_results:
	break
	return results[:max_results]


	bing = BingSearch()

	@app.get("/search", response_model=List[BingSearchResult])
	async def text_search(
	query: str = Query(..., description="The search keywords."),
	region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
	safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
	max_results: int = Query(10, description="Maximum number of results to return."),
	):
	"""
	Perform a text search on Bing.
	"""
	try:
	results = bing.text(
	keywords=query,
	region=region,
	safesearch=safesearch,
	max_results=max_results,
	)
	return results
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/suggestions", response_model=List[str])
	async def get_suggestions(
	query: str = Query(..., description="The search query for which to fetch suggestions."),
	region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
	):
	"""
	Fetches search suggestions for a given query.
	"""
	try:
	suggestions = bing.suggestions(query=query, region=region)
	return suggestions
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/images", response_model=List[BingImageResult])
	async def image_search(
	query: str = Query(..., description="The search keywords for images."),
	region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
	safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
	max_results: int = Query(10, description="Maximum number of image results to return."),
	):
	"""
	Perform an image search on Bing.
	"""
	try:
	results = bing.images(
	keywords=query,
	region=region,
	safesearch=safesearch,
	max_results=max_results,
	)
	return results
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/news", response_model=List[BingNewsResult])
	async def news_search(
	query: str = Query(..., description="The search keywords for news."),
	region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
	safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
	max_results: int = Query(10, description="Maximum number of news results to return."),
	):
	"""
	Perform a news search on Bing.
	"""
	try:
	results = bing.news(
	keywords=query,
	region=region,
	safesearch=safesearch,
	max_results=max_results,
	)
	return results
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)