Husnain Rasheed commited on
Commit
8e70b5f
·
verified ·
1 Parent(s): 2b7ed67

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +359 -51
main.py CHANGED
@@ -1,78 +1,386 @@
1
- # main.py
2
- from fastapi import FastAPI, Query
3
  from typing import List, Optional
4
  from pydantic import BaseModel
5
- import uvicorn
 
 
 
 
 
 
 
 
6
 
7
- # Paste the entire BingSearch library code here
8
- # (Omitted for brevity in this response, but include the full code from the query)
9
-
10
- app = FastAPI(title="BingSearch API", description="API for Bing search functionalities", version="1.0")
11
-
12
- bing = BingSearch(
13
- timeout=10,
14
- proxies=None,
15
- verify=True,
16
- lang="en-US",
17
- sleep_interval=0.0,
18
- impersonate="chrome110"
19
  )
20
 
21
- class SearchResult(BaseModel):
 
 
 
22
  url: str
23
  title: str
24
  description: str
 
25
 
26
- class ImageResult(BaseModel):
27
  title: str
28
  image: str
29
  thumbnail: str
30
  url: str
31
  source: str
32
 
33
- class NewsResult(BaseModel):
34
  title: str
35
  url: str
36
  description: str
37
- source: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- @app.get("/search/text", response_model=List[SearchResult])
40
- def search_text(
41
- keywords: str = Query(..., description="Search keywords"),
42
- region: Optional[str] = Query(None, description="Region for search"),
43
- safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
44
- max_results: int = Query(10, description="Maximum number of results"),
45
- unique: bool = Query(True, description="Exclude duplicate URLs")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  ):
47
- results = bing.text(keywords, region, safesearch, max_results, unique)
48
- return [SearchResult(url=r.url, title=r.title, description=r.description) for r in results]
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- @app.get("/search/suggestions", response_model=List[str])
51
- def get_suggestions(
52
- query: str = Query(..., description="Query for suggestions"),
53
- region: Optional[str] = Query(None, description="Region for suggestions")
54
  ):
55
- return bing.suggestions(query, region)
56
-
57
- @app.get("/search/images", response_model=List[ImageResult])
58
- def search_images(
59
- keywords: str = Query(..., description="Search keywords"),
60
- region: Optional[str] = Query(None, description="Region for search"),
61
- safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
62
- max_results: int = Query(10, description="Maximum number of results")
 
 
 
 
 
 
 
63
  ):
64
- results = bing.images(keywords, region, safesearch, max_results)
65
- return [ImageResult(title=r.title, image=r.image, thumbnail=r.thumbnail, url=r.url, source=r.source) for r in results]
66
-
67
- @app.get("/search/news", response_model=List[NewsResult])
68
- def search_news(
69
- keywords: str = Query(..., description="Search keywords"),
70
- region: Optional[str] = Query(None, description="Region for search"),
71
- safesearch: str = Query("moderate", description="Safe search level: on, moderate, off"),
72
- max_results: int = Query(10, description="Maximum number of results")
 
 
 
 
 
 
 
 
 
 
 
73
  ):
74
- results = bing.news(keywords, region, safesearch, max_results)
75
- return [NewsResult(title=r.title, url=r.url, description=r.description, source=r.source) for r in results]
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  if __name__ == "__main__":
 
78
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ from fastapi import FastAPI, HTTPException, Query
 
2
  from typing import List, Optional
3
  from pydantic import BaseModel
4
+ from time import sleep
5
+ from curl_cffi.requests import Session
6
+ from urllib.parse import urlencode, unquote, urlparse, parse_qs
7
+ import base64
8
+ from typing import Dict, Any
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from webscout.litagent import LitAgent
11
+ from bs4 import BeautifulSoup
12
+ import json
13
 
14
+ app = FastAPI(
15
+ title="Bing Search API",
16
+ description="A FastAPI wrapper for the BingSearch library with advanced features.",
17
+ version="1.0.0",
 
 
 
 
 
 
 
 
18
  )
19
 
20
+ # --- BingSearch Library Code ---
21
+ # The provided BingSearch code is integrated here directly.
22
+
23
+ class BingSearchResult(BaseModel):
24
  url: str
25
  title: str
26
  description: str
27
+ metadata: Dict[str, Any] = {}
28
 
29
+ class BingImageResult(BaseModel):
30
  title: str
31
  image: str
32
  thumbnail: str
33
  url: str
34
  source: str
35
 
36
+ class BingNewsResult(BaseModel):
37
  title: str
38
  url: str
39
  description: str
40
+ source: str = ""
41
+
42
+ class BingSearch:
43
+ """Bing search implementation with configurable parameters and advanced features."""
44
+ _executor: ThreadPoolExecutor = ThreadPoolExecutor()
45
+
46
+ def __init__(
47
+ self,
48
+ timeout: int = 10,
49
+ proxies: Optional[Dict[str, str]] = None,
50
+ verify: bool = True,
51
+ lang: str = "en-US",
52
+ sleep_interval: float = 0.0,
53
+ impersonate: str = "chrome110"
54
+ ):
55
+ self.timeout = timeout
56
+ self.proxies = proxies if proxies else {}
57
+ self.verify = verify
58
+ self.lang = lang
59
+ self.sleep_interval = sleep_interval
60
+ self._base_url = "https://www.bing.com"
61
+ self.session = Session(
62
+ proxies=self.proxies,
63
+ verify=self.verify,
64
+ timeout=self.timeout,
65
+ impersonate=impersonate
66
+ )
67
+ self.session.headers.update(LitAgent().generate_fingerprint())
68
+
69
+ def _selectors(self, element):
70
+ selectors = {
71
+ 'url': 'h2 a',
72
+ 'title': 'h2',
73
+ 'text': 'p',
74
+ 'links': 'ol#b_results > li.b_algo',
75
+ 'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
76
+ }
77
+ return selectors[element]
78
+
79
+ def _first_page(self, query):
80
+ url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
81
+ return {'url': url, 'data': None}
82
+
83
+ def _next_page(self, soup):
84
+ selector = self._selectors('next')
85
+ next_page_tag = soup.select_one(selector)
86
+ url = None
87
+ if next_page_tag and next_page_tag.get('href'):
88
+ url = self._base_url + next_page_tag['href']
89
+ return {'url': url, 'data': None}
90
+
91
+ def _get_url(self, tag):
92
+ url = tag.get('href', '')
93
+ resp = url
94
+ try:
95
+ parsed_url = urlparse(url)
96
+ query_params = parse_qs(parsed_url.query)
97
+ if "u" in query_params:
98
+ encoded_url = query_params["u"][0][2:]
99
+ try:
100
+ decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
101
+ except base64.binascii.Error as e:
102
+ print(f"Error decoding Base64 string: {e}")
103
+ return url
104
+ resp = decoded_bytes.decode('utf-8')
105
+ except Exception as e:
106
+ print(f"Error decoding Base64 string: {e}")
107
+ return resp
108
 
109
+ def text(
110
+ self,
111
+ keywords: str,
112
+ region: str = None,
113
+ safesearch: str = "moderate",
114
+ max_results: int = 10,
115
+ unique: bool = True
116
+ ) -> List[BingSearchResult]:
117
+ if not keywords:
118
+ raise ValueError("Search keywords cannot be empty")
119
+ safe_map = {
120
+ "on": "Strict",
121
+ "moderate": "Moderate",
122
+ "off": "Off"
123
+ }
124
+ safe = safe_map.get(safesearch.lower(), "Moderate")
125
+ fetched_results = []
126
+ fetched_links = set()
127
+ def fetch_page(url):
128
+ try:
129
+ resp = self.session.get(url)
130
+ resp.raise_for_status()
131
+ return resp.text
132
+ except Exception as e:
133
+ if hasattr(e, 'response') and e.response is not None:
134
+ raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
135
+ else:
136
+ raise Exception(f"Bing search failed: {str(e)}")
137
+
138
+ url = self._first_page(keywords)['url']
139
+ urls_to_fetch = [url]
140
+ while len(fetched_results) < max_results and urls_to_fetch:
141
+ html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
142
+ urls_to_fetch = []
143
+ for html in html_pages:
144
+ soup = BeautifulSoup(html, "html.parser")
145
+ selector_links = self._selectors('links')
146
+ result_blocks = soup.select(selector_links)
147
+ for result in result_blocks:
148
+ link_tag = result.select_one(self._selectors('url'))
149
+ if not link_tag:
150
+ continue
151
+ url_val = self._get_url(link_tag)
152
+ title_tag = result.select_one(self._selectors('title'))
153
+ title = title_tag.get_text(strip=True) if title_tag else ''
154
+ desc_tag = result.select_one(self._selectors('text'))
155
+ description = desc_tag.get_text(strip=True) if desc_tag else ''
156
+ if url_val and title:
157
+ if unique and url_val in fetched_links:
158
+ continue
159
+ fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
160
+ fetched_links.add(url_val)
161
+ if len(fetched_results) >= max_results:
162
+ break
163
+ if len(fetched_results) >= max_results:
164
+ break
165
+ next_page_info = self._next_page(soup)
166
+ if next_page_info['url']:
167
+ urls_to_fetch.append(next_page_info['url'])
168
+ sleep(self.sleep_interval)
169
+ next_page_info = self._next_page(soup)
170
+ url = next_page_info['url']
171
+ sleep(self.sleep_interval)
172
+ return fetched_results[:max_results]
173
+
174
+ def suggestions(self, query: str, region: str = None) -> List[str]:
175
+ if not query:
176
+ raise ValueError("Search query cannot be empty")
177
+ params = {
178
+ "query": query,
179
+ "mkt": region if region else "en-US"
180
+ }
181
+ url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
182
+ try:
183
+ resp = self.session.get(url)
184
+ resp.raise_for_status()
185
+ data = resp.json()
186
+ if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
187
+ return data[1]
188
+ return []
189
+ except Exception as e:
190
+ if hasattr(e, 'response') and e.response is not None:
191
+ raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
192
+ else:
193
+ raise Exception(f"Bing suggestions failed: {str(e)}")
194
+
195
+ def images(
196
+ self,
197
+ keywords: str,
198
+ region: str = None,
199
+ safesearch: str = "moderate",
200
+ max_results: int = 10
201
+ ) -> List[BingImageResult]:
202
+ if not keywords:
203
+ raise ValueError("Search keywords cannot be empty")
204
+ safe_map = {
205
+ "on": "Strict",
206
+ "moderate": "Moderate",
207
+ "off": "Off"
208
+ }
209
+ safe = safe_map.get(safesearch.lower(), "Moderate")
210
+ params = {
211
+ "q": keywords,
212
+ "count": max_results,
213
+ "setlang": self.lang,
214
+ "safeSearch": safe,
215
+ }
216
+ if region:
217
+ params["mkt"] = region
218
+ url = f"{self._base_url}/images/search?{urlencode(params)}"
219
+ try:
220
+ resp = self.session.get(url)
221
+ resp.raise_for_status()
222
+ html = resp.text
223
+ except Exception as e:
224
+ if hasattr(e, 'response') and e.response is not None:
225
+ raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
226
+ else:
227
+ raise Exception(f"Bing image search failed: {str(e)}")
228
+ soup = BeautifulSoup(html, "html.parser")
229
+ results = []
230
+ for item in soup.select("a.iusc"):
231
+ try:
232
+ m = item.get("m")
233
+ meta = json.loads(m) if m else {}
234
+ image_url = meta.get("murl", "")
235
+ thumb_url = meta.get("turl", "")
236
+ title = meta.get("t", "")
237
+ page_url = meta.get("purl", "")
238
+ source = meta.get("surl", "")
239
+ if image_url:
240
+ results.append(BingImageResult(title=title, image=image_url, thumbnail=thumb_url, url=page_url, source=source))
241
+ if len(results) >= max_results:
242
+ break
243
+ except Exception:
244
+ continue
245
+ return results[:max_results]
246
+
247
+ def news(
248
+ self,
249
+ keywords: str,
250
+ region: str = None,
251
+ safesearch: str = "moderate",
252
+ max_results: int = 10,
253
+ ) -> List['BingNewsResult']:
254
+ if not keywords:
255
+ raise ValueError("Search keywords cannot be empty")
256
+ safe_map = {
257
+ "on": "Strict",
258
+ "moderate": "Moderate",
259
+ "off": "Off"
260
+ }
261
+ safe = safe_map.get(safesearch.lower(), "Moderate")
262
+ params = {
263
+ "q": keywords,
264
+ "form": "QBNH",
265
+ "safeSearch": safe,
266
+ }
267
+ if region:
268
+ params["mkt"] = region
269
+ url = f"{self._base_url}/news/search?{urlencode(params)}"
270
+ try:
271
+ resp = self.session.get(url)
272
+ resp.raise_for_status()
273
+ except Exception as e:
274
+ if hasattr(e, 'response') and e.response is not None:
275
+ raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
276
+ else:
277
+ raise Exception(f"Bing news search failed: {str(e)}")
278
+ soup = BeautifulSoup(resp.text, "html.parser")
279
+ results = []
280
+ for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
281
+ a_tag = item.find("a")
282
+ title = a_tag.get_text(strip=True) if a_tag else ''
283
+ url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
284
+ desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
285
+ description = desc_tag.get_text(strip=True) if desc_tag else ''
286
+ source_tag = item.find("div", class_="source")
287
+ source = source_tag.get_text(strip=True) if source_tag else ''
288
+ if url_val and title:
289
+ results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
290
+ if len(results) >= max_results:
291
+ break
292
+ if not results:
293
+ for item in soup.select("a.title"):
294
+ title = item.get_text(strip=True)
295
+ url_val = item['href'] if item.has_attr('href') else ''
296
+ description = ''
297
+ source = ''
298
+ if url_val and title:
299
+ results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
300
+ if len(results) >= max_results:
301
+ break
302
+ return results[:max_results]
303
+
304
+
305
+ bing = BingSearch()
306
+
307
+ @app.get("/search", response_model=List[BingSearchResult])
308
+ async def text_search(
309
+ query: str = Query(..., description="The search keywords."),
310
+ region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
311
+ safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
312
+ max_results: int = Query(10, description="Maximum number of results to return."),
313
  ):
314
+ """
315
+ Perform a text search on Bing.
316
+ """
317
+ try:
318
+ results = bing.text(
319
+ keywords=query,
320
+ region=region,
321
+ safesearch=safesearch,
322
+ max_results=max_results,
323
+ )
324
+ return results
325
+ except Exception as e:
326
+ raise HTTPException(status_code=500, detail=str(e))
327
 
328
+ @app.get("/suggestions", response_model=List[str])
329
+ async def get_suggestions(
330
+ query: str = Query(..., description="The search query for which to fetch suggestions."),
331
+ region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
332
  ):
333
+ """
334
+ Fetches search suggestions for a given query.
335
+ """
336
+ try:
337
+ suggestions = bing.suggestions(query=query, region=region)
338
+ return suggestions
339
+ except Exception as e:
340
+ raise HTTPException(status_code=500, detail=str(e))
341
+
342
+ @app.get("/images", response_model=List[BingImageResult])
343
+ async def image_search(
344
+ query: str = Query(..., description="The search keywords for images."),
345
+ region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
346
+ safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
347
+ max_results: int = Query(10, description="Maximum number of image results to return."),
348
  ):
349
+ """
350
+ Perform an image search on Bing.
351
+ """
352
+ try:
353
+ results = bing.images(
354
+ keywords=query,
355
+ region=region,
356
+ safesearch=safesearch,
357
+ max_results=max_results,
358
+ )
359
+ return results
360
+ except Exception as e:
361
+ raise HTTPException(status_code=500, detail=str(e))
362
+
363
+ @app.get("/news", response_model=List[BingNewsResult])
364
+ async def news_search(
365
+ query: str = Query(..., description="The search keywords for news."),
366
+ region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
367
+ safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
368
+ max_results: int = Query(10, description="Maximum number of news results to return."),
369
  ):
370
+ """
371
+ Perform a news search on Bing.
372
+ """
373
+ try:
374
+ results = bing.news(
375
+ keywords=query,
376
+ region=region,
377
+ safesearch=safesearch,
378
+ max_results=max_results,
379
+ )
380
+ return results
381
+ except Exception as e:
382
+ raise HTTPException(status_code=500, detail=str(e))
383
 
384
  if __name__ == "__main__":
385
+ import uvicorn
386
  uvicorn.run(app, host="0.0.0.0", port=8000)