Husnain Rasheed commited on
Commit
642dd91
·
verified ·
1 Parent(s): 94fa239

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +92 -101
main.py CHANGED
@@ -10,11 +10,12 @@ from concurrent.futures import ThreadPoolExecutor
10
  from webscout.litagent import LitAgent
11
  from bs4 import BeautifulSoup
12
  import json
 
13
 
14
  app = FastAPI(
15
- title="Bing Search API",
16
- description="A FastAPI wrapper for the BingSearch library with advanced features.",
17
- version="1.0.0",
18
  )
19
 
20
  # --- BingSearch Library Code ---
@@ -64,16 +65,14 @@ class BingSearch:
64
  timeout=self.timeout,
65
  impersonate=impersonate
66
  )
67
- # It's good practice to set a realistic User-Agent
68
  self.session.headers.update({
69
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
70
  })
71
 
72
- # FIX: Updated selectors to be more robust against Bing UI changes.
73
  def _selectors(self, element):
74
  selectors = {
75
- 'links': 'ol#b_results > li', # More generic selector for any list item in results
76
- 'next': 'a.sb_pagN' # Selector for the "Next" page button
77
  }
78
  return selectors[element]
79
 
@@ -91,6 +90,10 @@ class BingSearch:
91
 
92
  def _get_url(self, tag):
93
  url = tag.get('href', '')
 
 
 
 
94
  resp = url
95
  try:
96
  parsed_url = urlparse(url)
@@ -107,7 +110,6 @@ class BingSearch:
107
  print(f"Error decoding Base64 string: {e}")
108
  return resp
109
 
110
- # FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
111
  def text(
112
  self,
113
  keywords: str,
@@ -136,11 +138,9 @@ class BingSearch:
136
  html = fetch_page(current_url)
137
  soup = BeautifulSoup(html, "html.parser")
138
 
139
- # Use the more generic selector for result blocks
140
  result_blocks = soup.select(self._selectors('links'))
141
 
142
  for result in result_blocks:
143
- # Find the title and link, which are usually in an <h2> tag
144
  title_tag = result.find('h2')
145
  if not title_tag:
146
  continue
@@ -150,24 +150,19 @@ class BingSearch:
150
  continue
151
 
152
  url_val = self._get_url(link_tag)
153
- title = title_tag.get_text(strip=True)
 
154
 
155
- # Find the description, often in a div with class 'b_caption'
156
  desc_container = result.find('div', class_='b_caption')
157
  description = ''
158
  if desc_container:
159
- # Find the paragraph within the caption, or use the whole caption text
160
- desc_p = desc_container.find('p')
161
- if desc_p:
162
- description = desc_p.get_text(strip=True)
163
- else:
164
- description = desc_container.get_text(strip=True)
165
 
166
- # Fallback if no 'b_caption' is found
167
  if not description:
168
  p_tag = result.find('p')
169
  if p_tag:
170
- description = p_tag.get_text(strip=True)
171
 
172
  if url_val and title:
173
  if unique and url_val in fetched_links:
@@ -182,7 +177,6 @@ class BingSearch:
182
  if len(fetched_results) >= max_results:
183
  break
184
 
185
- # Find the next page URL
186
  next_page_info = self._next_page(soup)
187
  current_url = next_page_info['url']
188
  if current_url:
@@ -190,7 +184,6 @@ class BingSearch:
190
 
191
  return fetched_results[:max_results]
192
 
193
-
194
  def suggestions(self, query: str, region: str = None) -> List[str]:
195
  if not query:
196
  raise ValueError("Search query cannot be empty")
@@ -207,10 +200,7 @@ class BingSearch:
207
  return data[1]
208
  return []
209
  except Exception as e:
210
- if hasattr(e, 'response') and e.response is not None:
211
- raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
212
- else:
213
- raise Exception(f"Bing suggestions failed: {str(e)}")
214
 
215
  def images(
216
  self,
@@ -221,18 +211,9 @@ class BingSearch:
221
  ) -> List[BingImageResult]:
222
  if not keywords:
223
  raise ValueError("Search keywords cannot be empty")
224
- safe_map = {
225
- "on": "Strict",
226
- "moderate": "Moderate",
227
- "off": "Off"
228
- }
229
  safe = safe_map.get(safesearch.lower(), "Moderate")
230
- params = {
231
- "q": keywords,
232
- "count": max_results,
233
- "setlang": self.lang,
234
- "safeSearch": safe,
235
- }
236
  if region:
237
  params["mkt"] = region
238
  url = f"{self._base_url}/images/search?{urlencode(params)}"
@@ -241,10 +222,7 @@ class BingSearch:
241
  resp.raise_for_status()
242
  html = resp.text
243
  except Exception as e:
244
- if hasattr(e, 'response') and e.response is not None:
245
- raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
246
- else:
247
- raise Exception(f"Bing image search failed: {str(e)}")
248
  soup = BeautifulSoup(html, "html.parser")
249
  results = []
250
  for item in soup.select("a.iusc"):
@@ -273,17 +251,9 @@ class BingSearch:
273
  ) -> List['BingNewsResult']:
274
  if not keywords:
275
  raise ValueError("Search keywords cannot be empty")
276
- safe_map = {
277
- "on": "Strict",
278
- "moderate": "Moderate",
279
- "off": "Off"
280
- }
281
  safe = safe_map.get(safesearch.lower(), "Moderate")
282
- params = {
283
- "q": keywords,
284
- "form": "QBNH",
285
- "safeSearch": safe,
286
- }
287
  if region:
288
  params["mkt"] = region
289
  url = f"{self._base_url}/news/search?{urlencode(params)}"
@@ -291,27 +261,24 @@ class BingSearch:
291
  resp = self.session.get(url)
292
  resp.raise_for_status()
293
  except Exception as e:
294
- if hasattr(e, 'response') and e.response is not None:
295
- raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
296
- else:
297
- raise Exception(f"Bing news search failed: {str(e)}")
298
  soup = BeautifulSoup(resp.text, "html.parser")
299
  results = []
300
  for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
301
  a_tag = item.find("a")
302
- title = a_tag.get_text(strip=True) if a_tag else ''
303
  url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
304
  desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
305
- description = desc_tag.get_text(strip=True) if desc_tag else ''
306
  source_tag = item.find("div", class_="source")
307
- source = source_tag.get_text(strip=True) if source_tag else ''
308
  if url_val and title:
309
  results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
310
  if len(results) >= max_results:
311
  break
312
  if not results:
313
  for item in soup.select("a.title"):
314
- title = item.get_text(strip=True)
315
  url_val = item['href'] if item.has_attr('href') else ''
316
  description = ''
317
  source = ''
@@ -321,83 +288,107 @@ class BingSearch:
321
  break
322
  return results[:max_results]
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  bing = BingSearch()
 
326
 
327
- @app.get("/search", response_model=List[BingSearchResult])
328
- async def text_search(
329
  query: str = Query(..., description="The search keywords."),
330
  region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
331
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
332
  max_results: int = Query(10, description="Maximum number of results to return."),
333
  ):
334
- """
335
- Perform a text search on Bing.
336
- """
337
  try:
338
- results = bing.text(
339
- keywords=query,
340
- region=region,
341
- safesearch=safesearch,
342
- max_results=max_results,
343
- )
344
- return results
345
  except Exception as e:
346
  raise HTTPException(status_code=500, detail=str(e))
347
 
348
- @app.get("/suggestions", response_model=List[str])
349
- async def get_suggestions(
350
  query: str = Query(..., description="The search query for which to fetch suggestions."),
351
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
352
  ):
353
- """
354
- Fetches search suggestions for a given query.
355
- """
356
  try:
357
- suggestions = bing.suggestions(query=query, region=region)
358
- return suggestions
359
  except Exception as e:
360
  raise HTTPException(status_code=500, detail=str(e))
361
 
362
- @app.get("/images", response_model=List[BingImageResult])
363
- async def image_search(
364
  query: str = Query(..., description="The search keywords for images."),
365
  region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
366
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
367
  max_results: int = Query(10, description="Maximum number of image results to return."),
368
  ):
369
- """
370
- Perform an image search on Bing.
371
- """
372
  try:
373
- results = bing.images(
374
- keywords=query,
375
- region=region,
376
- safesearch=safesearch,
377
- max_results=max_results,
378
- )
379
- return results
380
  except Exception as e:
381
  raise HTTPException(status_code=500, detail=str(e))
382
 
383
- @app.get("/news", response_model=List[BingNewsResult])
384
- async def news_search(
385
  query: str = Query(..., description="The search keywords for news."),
386
  region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
387
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
388
  max_results: int = Query(10, description="Maximum number of news results to return."),
 
 
 
 
 
 
 
 
 
 
 
 
389
  ):
390
  """
391
- Perform a news search on Bing.
 
392
  """
393
  try:
394
- results = bing.news(
395
- keywords=query,
396
- region=region,
397
- safesearch=safesearch,
398
- max_results=max_results,
399
- )
400
- return results
401
  except Exception as e:
402
  raise HTTPException(status_code=500, detail=str(e))
403
 
 
10
  from webscout.litagent import LitAgent
11
  from bs4 import BeautifulSoup
12
  import json
13
+ import xml.etree.ElementTree as ET # Added for Google Suggestions XML parsing
14
 
15
  app = FastAPI(
16
+ title="Web Search API",
17
+ description="A FastAPI wrapper for Bing and Google search services.",
18
+ version="1.1.0",
19
  )
20
 
21
  # --- BingSearch Library Code ---
 
65
  timeout=self.timeout,
66
  impersonate=impersonate
67
  )
 
68
  self.session.headers.update({
69
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/536.36"
70
  })
71
 
 
72
  def _selectors(self, element):
73
  selectors = {
74
+ 'links': 'ol#b_results > li',
75
+ 'next': 'a.sb_pagN'
76
  }
77
  return selectors[element]
78
 
 
90
 
91
  def _get_url(self, tag):
92
  url = tag.get('href', '')
93
+ # FIX: Ensure relative URLs from Bing (like /news/search) become absolute
94
+ if url.startswith('/'):
95
+ return self._base_url + url
96
+
97
  resp = url
98
  try:
99
  parsed_url = urlparse(url)
 
110
  print(f"Error decoding Base64 string: {e}")
111
  return resp
112
 
 
113
  def text(
114
  self,
115
  keywords: str,
 
138
  html = fetch_page(current_url)
139
  soup = BeautifulSoup(html, "html.parser")
140
 
 
141
  result_blocks = soup.select(self._selectors('links'))
142
 
143
  for result in result_blocks:
 
144
  title_tag = result.find('h2')
145
  if not title_tag:
146
  continue
 
150
  continue
151
 
152
  url_val = self._get_url(link_tag)
153
+ # FIX: Use separator=' ' to prevent text from being squashed together.
154
+ title = title_tag.get_text(strip=True, separator=' ')
155
 
 
156
  desc_container = result.find('div', class_='b_caption')
157
  description = ''
158
  if desc_container:
159
+ # FIX: Use separator=' ' for descriptions as well.
160
+ description = desc_container.get_text(strip=True, separator=' ')
 
 
 
 
161
 
 
162
  if not description:
163
  p_tag = result.find('p')
164
  if p_tag:
165
+ description = p_tag.get_text(strip=True, separator=' ')
166
 
167
  if url_val and title:
168
  if unique and url_val in fetched_links:
 
177
  if len(fetched_results) >= max_results:
178
  break
179
 
 
180
  next_page_info = self._next_page(soup)
181
  current_url = next_page_info['url']
182
  if current_url:
 
184
 
185
  return fetched_results[:max_results]
186
 
 
187
  def suggestions(self, query: str, region: str = None) -> List[str]:
188
  if not query:
189
  raise ValueError("Search query cannot be empty")
 
200
  return data[1]
201
  return []
202
  except Exception as e:
203
+ raise Exception(f"Bing suggestions failed: {str(e)}")
 
 
 
204
 
205
  def images(
206
  self,
 
211
  ) -> List[BingImageResult]:
212
  if not keywords:
213
  raise ValueError("Search keywords cannot be empty")
214
+ safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
 
 
 
 
215
  safe = safe_map.get(safesearch.lower(), "Moderate")
216
+ params = {"q": keywords, "count": max_results, "setlang": self.lang, "safeSearch": safe}
 
 
 
 
 
217
  if region:
218
  params["mkt"] = region
219
  url = f"{self._base_url}/images/search?{urlencode(params)}"
 
222
  resp.raise_for_status()
223
  html = resp.text
224
  except Exception as e:
225
+ raise Exception(f"Bing image search failed: {str(e)}")
 
 
 
226
  soup = BeautifulSoup(html, "html.parser")
227
  results = []
228
  for item in soup.select("a.iusc"):
 
251
  ) -> List['BingNewsResult']:
252
  if not keywords:
253
  raise ValueError("Search keywords cannot be empty")
254
+ safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
 
 
 
 
255
  safe = safe_map.get(safesearch.lower(), "Moderate")
256
+ params = {"q": keywords, "form": "QBNH", "safeSearch": safe}
 
 
 
 
257
  if region:
258
  params["mkt"] = region
259
  url = f"{self._base_url}/news/search?{urlencode(params)}"
 
261
  resp = self.session.get(url)
262
  resp.raise_for_status()
263
  except Exception as e:
264
+ raise Exception(f"Bing news search failed: {str(e)}")
 
 
 
265
  soup = BeautifulSoup(resp.text, "html.parser")
266
  results = []
267
  for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
268
  a_tag = item.find("a")
269
+ title = a_tag.get_text(strip=True, separator=' ') if a_tag else ''
270
  url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
271
  desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
272
+ description = desc_tag.get_text(strip=True, separator=' ') if desc_tag else ''
273
  source_tag = item.find("div", class_="source")
274
+ source = source_tag.get_text(strip=True, separator=' ') if source_tag else ''
275
  if url_val and title:
276
  results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
277
  if len(results) >= max_results:
278
  break
279
  if not results:
280
  for item in soup.select("a.title"):
281
+ title = item.get_text(strip=True, separator=' ')
282
  url_val = item['href'] if item.has_attr('href') else ''
283
  description = ''
284
  source = ''
 
288
  break
289
  return results[:max_results]
290
 
291
+ # --- NEW: Google Suggestions Class ---
292
+ class GoogleSuggestions:
293
+ """Fetches search suggestions from Google's unofficial suggestions API."""
294
+ def __init__(self, timeout: int = 10, proxies: Optional[Dict[str, str]] = None):
295
+ self.session = Session(
296
+ proxies=proxies if proxies else {},
297
+ timeout=timeout,
298
+ impersonate="chrome110"
299
+ )
300
+ self.session.headers.update({
301
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
302
+ })
303
+
304
+ def get(self, query: str, lang: str = "en") -> List[str]:
305
+ if not query:
306
+ raise ValueError("Search query cannot be empty")
307
+
308
+ params = {"q": query, "hl": lang, "output": "toolbar"}
309
+ url = f"https://clients1.google.com/complete/search?{urlencode(params)}"
310
+
311
+ try:
312
+ response = self.session.get(url)
313
+ response.raise_for_status()
314
+
315
+ suggestions = []
316
+ root = ET.fromstring(response.content)
317
+ for suggestion_node in root.findall('CompleteSuggestion/suggestion'):
318
+ data = suggestion_node.get('data')
319
+ if data:
320
+ suggestions.append(data)
321
+ return suggestions
322
+ except Exception as e:
323
+ raise Exception(f"Google suggestions failed: {str(e)}")
324
+
325
+ # --- FastAPI Endpoints ---
326
 
327
  bing = BingSearch()
328
+ google = GoogleSuggestions()
329
 
330
+ @app.get("/bing/search", response_model=List[BingSearchResult], tags=["Bing"])
331
+ async def bing_text_search(
332
  query: str = Query(..., description="The search keywords."),
333
  region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
334
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
335
  max_results: int = Query(10, description="Maximum number of results to return."),
336
  ):
337
+ """Perform a text search on Bing."""
 
 
338
  try:
339
+ return bing.text(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
 
 
 
 
 
 
340
  except Exception as e:
341
  raise HTTPException(status_code=500, detail=str(e))
342
 
343
+ @app.get("/bing/suggestions", response_model=List[str], tags=["Bing"])
344
+ async def bing_get_suggestions(
345
  query: str = Query(..., description="The search query for which to fetch suggestions."),
346
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
347
  ):
348
+ """Fetches search suggestions for a given query from Bing."""
 
 
349
  try:
350
+ return bing.suggestions(query=query, region=region)
 
351
  except Exception as e:
352
  raise HTTPException(status_code=500, detail=str(e))
353
 
354
+ @app.get("/bing/images", response_model=List[BingImageResult], tags=["Bing"])
355
+ async def bing_image_search(
356
  query: str = Query(..., description="The search keywords for images."),
357
  region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
358
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
359
  max_results: int = Query(10, description="Maximum number of image results to return."),
360
  ):
361
+ """Perform an image search on Bing."""
 
 
362
  try:
363
+ return bing.images(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
 
 
 
 
 
 
364
  except Exception as e:
365
  raise HTTPException(status_code=500, detail=str(e))
366
 
367
+ @app.get("/bing/news", response_model=List[BingNewsResult], tags=["Bing"])
368
+ async def bing_news_search(
369
  query: str = Query(..., description="The search keywords for news."),
370
  region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
371
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
372
  max_results: int = Query(10, description="Maximum number of news results to return."),
373
+ ):
374
+ """Perform a news search on Bing."""
375
+ try:
376
+ return bing.news(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
377
+ except Exception as e:
378
+ raise HTTPException(status_code=500, detail=str(e))
379
+
380
+ # --- NEW: Google Suggestions Endpoint ---
381
+ @app.get("/google/suggestions", response_model=List[str], tags=["Google"])
382
+ async def google_get_suggestions(
383
+ query: str = Query(..., description="The search query for which to fetch suggestions."),
384
+ lang: str = Query("en", description="The language for the suggestions (e.g., 'en', 'es')."),
385
  ):
386
  """
387
+ Fetches search suggestions for a given query from Google.
388
+ This replicates the functionality of the provided C++ code.
389
  """
390
  try:
391
+ return google.get(query=query, lang=lang)
 
 
 
 
 
 
392
  except Exception as e:
393
  raise HTTPException(status_code=500, detail=str(e))
394