Husnain Rasheed commited on
Commit
5119d9b
·
verified ·
1 Parent(s): 642dd91

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +101 -92
main.py CHANGED
@@ -10,12 +10,11 @@ from concurrent.futures import ThreadPoolExecutor
10
  from webscout.litagent import LitAgent
11
  from bs4 import BeautifulSoup
12
  import json
13
- import xml.etree.ElementTree as ET # Added for Google Suggestions XML parsing
14
 
15
  app = FastAPI(
16
- title="Web Search API",
17
- description="A FastAPI wrapper for Bing and Google search services.",
18
- version="1.1.0",
19
  )
20
 
21
  # --- BingSearch Library Code ---
@@ -65,14 +64,16 @@ class BingSearch:
65
  timeout=self.timeout,
66
  impersonate=impersonate
67
  )
 
68
  self.session.headers.update({
69
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/536.36"
70
  })
71
 
 
72
  def _selectors(self, element):
73
  selectors = {
74
- 'links': 'ol#b_results > li',
75
- 'next': 'a.sb_pagN'
76
  }
77
  return selectors[element]
78
 
@@ -90,10 +91,6 @@ class BingSearch:
90
 
91
  def _get_url(self, tag):
92
  url = tag.get('href', '')
93
- # FIX: Ensure relative URLs from Bing (like /news/search) become absolute
94
- if url.startswith('/'):
95
- return self._base_url + url
96
-
97
  resp = url
98
  try:
99
  parsed_url = urlparse(url)
@@ -110,6 +107,7 @@ class BingSearch:
110
  print(f"Error decoding Base64 string: {e}")
111
  return resp
112
 
 
113
  def text(
114
  self,
115
  keywords: str,
@@ -138,9 +136,11 @@ class BingSearch:
138
  html = fetch_page(current_url)
139
  soup = BeautifulSoup(html, "html.parser")
140
 
 
141
  result_blocks = soup.select(self._selectors('links'))
142
 
143
  for result in result_blocks:
 
144
  title_tag = result.find('h2')
145
  if not title_tag:
146
  continue
@@ -150,19 +150,24 @@ class BingSearch:
150
  continue
151
 
152
  url_val = self._get_url(link_tag)
153
- # FIX: Use separator=' ' to prevent text from being squashed together.
154
- title = title_tag.get_text(strip=True, separator=' ')
155
 
 
156
  desc_container = result.find('div', class_='b_caption')
157
  description = ''
158
  if desc_container:
159
- # FIX: Use separator=' ' for descriptions as well.
160
- description = desc_container.get_text(strip=True, separator=' ')
 
 
 
 
161
 
 
162
  if not description:
163
  p_tag = result.find('p')
164
  if p_tag:
165
- description = p_tag.get_text(strip=True, separator=' ')
166
 
167
  if url_val and title:
168
  if unique and url_val in fetched_links:
@@ -177,6 +182,7 @@ class BingSearch:
177
  if len(fetched_results) >= max_results:
178
  break
179
 
 
180
  next_page_info = self._next_page(soup)
181
  current_url = next_page_info['url']
182
  if current_url:
@@ -184,6 +190,7 @@ class BingSearch:
184
 
185
  return fetched_results[:max_results]
186
 
 
187
  def suggestions(self, query: str, region: str = None) -> List[str]:
188
  if not query:
189
  raise ValueError("Search query cannot be empty")
@@ -200,7 +207,10 @@ class BingSearch:
200
  return data[1]
201
  return []
202
  except Exception as e:
203
- raise Exception(f"Bing suggestions failed: {str(e)}")
 
 
 
204
 
205
  def images(
206
  self,
@@ -211,9 +221,18 @@ class BingSearch:
211
  ) -> List[BingImageResult]:
212
  if not keywords:
213
  raise ValueError("Search keywords cannot be empty")
214
- safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
 
 
 
 
215
  safe = safe_map.get(safesearch.lower(), "Moderate")
216
- params = {"q": keywords, "count": max_results, "setlang": self.lang, "safeSearch": safe}
 
 
 
 
 
217
  if region:
218
  params["mkt"] = region
219
  url = f"{self._base_url}/images/search?{urlencode(params)}"
@@ -222,7 +241,10 @@ class BingSearch:
222
  resp.raise_for_status()
223
  html = resp.text
224
  except Exception as e:
225
- raise Exception(f"Bing image search failed: {str(e)}")
 
 
 
226
  soup = BeautifulSoup(html, "html.parser")
227
  results = []
228
  for item in soup.select("a.iusc"):
@@ -251,9 +273,17 @@ class BingSearch:
251
  ) -> List['BingNewsResult']:
252
  if not keywords:
253
  raise ValueError("Search keywords cannot be empty")
254
- safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
 
 
 
 
255
  safe = safe_map.get(safesearch.lower(), "Moderate")
256
- params = {"q": keywords, "form": "QBNH", "safeSearch": safe}
 
 
 
 
257
  if region:
258
  params["mkt"] = region
259
  url = f"{self._base_url}/news/search?{urlencode(params)}"
@@ -261,24 +291,27 @@ class BingSearch:
261
  resp = self.session.get(url)
262
  resp.raise_for_status()
263
  except Exception as e:
264
- raise Exception(f"Bing news search failed: {str(e)}")
 
 
 
265
  soup = BeautifulSoup(resp.text, "html.parser")
266
  results = []
267
  for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
268
  a_tag = item.find("a")
269
- title = a_tag.get_text(strip=True, separator=' ') if a_tag else ''
270
  url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
271
  desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
272
- description = desc_tag.get_text(strip=True, separator=' ') if desc_tag else ''
273
  source_tag = item.find("div", class_="source")
274
- source = source_tag.get_text(strip=True, separator=' ') if source_tag else ''
275
  if url_val and title:
276
  results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
277
  if len(results) >= max_results:
278
  break
279
  if not results:
280
  for item in soup.select("a.title"):
281
- title = item.get_text(strip=True, separator=' ')
282
  url_val = item['href'] if item.has_attr('href') else ''
283
  description = ''
284
  source = ''
@@ -288,107 +321,83 @@ class BingSearch:
288
  break
289
  return results[:max_results]
290
 
291
- # --- NEW: Google Suggestions Class ---
292
- class GoogleSuggestions:
293
- """Fetches search suggestions from Google's unofficial suggestions API."""
294
- def __init__(self, timeout: int = 10, proxies: Optional[Dict[str, str]] = None):
295
- self.session = Session(
296
- proxies=proxies if proxies else {},
297
- timeout=timeout,
298
- impersonate="chrome110"
299
- )
300
- self.session.headers.update({
301
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
302
- })
303
-
304
- def get(self, query: str, lang: str = "en") -> List[str]:
305
- if not query:
306
- raise ValueError("Search query cannot be empty")
307
-
308
- params = {"q": query, "hl": lang, "output": "toolbar"}
309
- url = f"https://clients1.google.com/complete/search?{urlencode(params)}"
310
-
311
- try:
312
- response = self.session.get(url)
313
- response.raise_for_status()
314
-
315
- suggestions = []
316
- root = ET.fromstring(response.content)
317
- for suggestion_node in root.findall('CompleteSuggestion/suggestion'):
318
- data = suggestion_node.get('data')
319
- if data:
320
- suggestions.append(data)
321
- return suggestions
322
- except Exception as e:
323
- raise Exception(f"Google suggestions failed: {str(e)}")
324
-
325
- # --- FastAPI Endpoints ---
326
 
327
  bing = BingSearch()
328
- google = GoogleSuggestions()
329
 
330
- @app.get("/bing/search", response_model=List[BingSearchResult], tags=["Bing"])
331
- async def bing_text_search(
332
  query: str = Query(..., description="The search keywords."),
333
  region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
334
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
335
  max_results: int = Query(10, description="Maximum number of results to return."),
336
  ):
337
- """Perform a text search on Bing."""
 
 
338
  try:
339
- return bing.text(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
 
 
 
 
 
 
340
  except Exception as e:
341
  raise HTTPException(status_code=500, detail=str(e))
342
 
343
- @app.get("/bing/suggestions", response_model=List[str], tags=["Bing"])
344
- async def bing_get_suggestions(
345
  query: str = Query(..., description="The search query for which to fetch suggestions."),
346
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
347
  ):
348
- """Fetches search suggestions for a given query from Bing."""
 
 
349
  try:
350
- return bing.suggestions(query=query, region=region)
 
351
  except Exception as e:
352
  raise HTTPException(status_code=500, detail=str(e))
353
 
354
- @app.get("/bing/images", response_model=List[BingImageResult], tags=["Bing"])
355
- async def bing_image_search(
356
  query: str = Query(..., description="The search keywords for images."),
357
  region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
358
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
359
  max_results: int = Query(10, description="Maximum number of image results to return."),
360
  ):
361
- """Perform an image search on Bing."""
 
 
362
  try:
363
- return bing.images(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
 
 
 
 
 
 
364
  except Exception as e:
365
  raise HTTPException(status_code=500, detail=str(e))
366
 
367
- @app.get("/bing/news", response_model=List[BingNewsResult], tags=["Bing"])
368
- async def bing_news_search(
369
  query: str = Query(..., description="The search keywords for news."),
370
  region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
371
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
372
  max_results: int = Query(10, description="Maximum number of news results to return."),
373
- ):
374
- """Perform a news search on Bing."""
375
- try:
376
- return bing.news(keywords=query, region=region, safesearch=safesearch, max_results=max_results)
377
- except Exception as e:
378
- raise HTTPException(status_code=500, detail=str(e))
379
-
380
- # --- NEW: Google Suggestions Endpoint ---
381
- @app.get("/google/suggestions", response_model=List[str], tags=["Google"])
382
- async def google_get_suggestions(
383
- query: str = Query(..., description="The search query for which to fetch suggestions."),
384
- lang: str = Query("en", description="The language for the suggestions (e.g., 'en', 'es')."),
385
  ):
386
  """
387
- Fetches search suggestions for a given query from Google.
388
- This replicates the functionality of the provided C++ code.
389
  """
390
  try:
391
- return google.get(query=query, lang=lang)
 
 
 
 
 
 
392
  except Exception as e:
393
  raise HTTPException(status_code=500, detail=str(e))
394
 
 
10
  from webscout.litagent import LitAgent
11
  from bs4 import BeautifulSoup
12
  import json
 
13
 
14
  app = FastAPI(
15
+ title="Bing Search API",
16
+ description="A FastAPI wrapper for the BingSearch library with advanced features.",
17
+ version="1.0.0",
18
  )
19
 
20
  # --- BingSearch Library Code ---
 
64
  timeout=self.timeout,
65
  impersonate=impersonate
66
  )
67
+ # It's good practice to set a realistic User-Agent
68
  self.session.headers.update({
69
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
70
  })
71
 
72
+ # FIX: Updated selectors to be more robust against Bing UI changes.
73
  def _selectors(self, element):
74
  selectors = {
75
+ 'links': 'ol#b_results > li', # More generic selector for any list item in results
76
+ 'next': 'a.sb_pagN' # Selector for the "Next" page button
77
  }
78
  return selectors[element]
79
 
 
91
 
92
  def _get_url(self, tag):
93
  url = tag.get('href', '')
 
 
 
 
94
  resp = url
95
  try:
96
  parsed_url = urlparse(url)
 
107
  print(f"Error decoding Base64 string: {e}")
108
  return resp
109
 
110
+ # FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
111
  def text(
112
  self,
113
  keywords: str,
 
136
  html = fetch_page(current_url)
137
  soup = BeautifulSoup(html, "html.parser")
138
 
139
+ # Use the more generic selector for result blocks
140
  result_blocks = soup.select(self._selectors('links'))
141
 
142
  for result in result_blocks:
143
+ # Find the title and link, which are usually in an <h2> tag
144
  title_tag = result.find('h2')
145
  if not title_tag:
146
  continue
 
150
  continue
151
 
152
  url_val = self._get_url(link_tag)
153
+ title = title_tag.get_text(strip=True)
 
154
 
155
+ # Find the description, often in a div with class 'b_caption'
156
  desc_container = result.find('div', class_='b_caption')
157
  description = ''
158
  if desc_container:
159
+ # Find the paragraph within the caption, or use the whole caption text
160
+ desc_p = desc_container.find('p')
161
+ if desc_p:
162
+ description = desc_p.get_text(strip=True)
163
+ else:
164
+ description = desc_container.get_text(strip=True)
165
 
166
+ # Fallback if no 'b_caption' is found
167
  if not description:
168
  p_tag = result.find('p')
169
  if p_tag:
170
+ description = p_tag.get_text(strip=True)
171
 
172
  if url_val and title:
173
  if unique and url_val in fetched_links:
 
182
  if len(fetched_results) >= max_results:
183
  break
184
 
185
+ # Find the next page URL
186
  next_page_info = self._next_page(soup)
187
  current_url = next_page_info['url']
188
  if current_url:
 
190
 
191
  return fetched_results[:max_results]
192
 
193
+
194
  def suggestions(self, query: str, region: str = None) -> List[str]:
195
  if not query:
196
  raise ValueError("Search query cannot be empty")
 
207
  return data[1]
208
  return []
209
  except Exception as e:
210
+ if hasattr(e, 'response') and e.response is not None:
211
+ raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
212
+ else:
213
+ raise Exception(f"Bing suggestions failed: {str(e)}")
214
 
215
  def images(
216
  self,
 
221
  ) -> List[BingImageResult]:
222
  if not keywords:
223
  raise ValueError("Search keywords cannot be empty")
224
+ safe_map = {
225
+ "on": "Strict",
226
+ "moderate": "Moderate",
227
+ "off": "Off"
228
+ }
229
  safe = safe_map.get(safesearch.lower(), "Moderate")
230
+ params = {
231
+ "q": keywords,
232
+ "count": max_results,
233
+ "setlang": self.lang,
234
+ "safeSearch": safe,
235
+ }
236
  if region:
237
  params["mkt"] = region
238
  url = f"{self._base_url}/images/search?{urlencode(params)}"
 
241
  resp.raise_for_status()
242
  html = resp.text
243
  except Exception as e:
244
+ if hasattr(e, 'response') and e.response is not None:
245
+ raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
246
+ else:
247
+ raise Exception(f"Bing image search failed: {str(e)}")
248
  soup = BeautifulSoup(html, "html.parser")
249
  results = []
250
  for item in soup.select("a.iusc"):
 
273
  ) -> List['BingNewsResult']:
274
  if not keywords:
275
  raise ValueError("Search keywords cannot be empty")
276
+ safe_map = {
277
+ "on": "Strict",
278
+ "moderate": "Moderate",
279
+ "off": "Off"
280
+ }
281
  safe = safe_map.get(safesearch.lower(), "Moderate")
282
+ params = {
283
+ "q": keywords,
284
+ "form": "QBNH",
285
+ "safeSearch": safe,
286
+ }
287
  if region:
288
  params["mkt"] = region
289
  url = f"{self._base_url}/news/search?{urlencode(params)}"
 
291
  resp = self.session.get(url)
292
  resp.raise_for_status()
293
  except Exception as e:
294
+ if hasattr(e, 'response') and e.response is not None:
295
+ raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
296
+ else:
297
+ raise Exception(f"Bing news search failed: {str(e)}")
298
  soup = BeautifulSoup(resp.text, "html.parser")
299
  results = []
300
  for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
301
  a_tag = item.find("a")
302
+ title = a_tag.get_text(strip=True) if a_tag else ''
303
  url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
304
  desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
305
+ description = desc_tag.get_text(strip=True) if desc_tag else ''
306
  source_tag = item.find("div", class_="source")
307
+ source = source_tag.get_text(strip=True) if source_tag else ''
308
  if url_val and title:
309
  results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
310
  if len(results) >= max_results:
311
  break
312
  if not results:
313
  for item in soup.select("a.title"):
314
+ title = item.get_text(strip=True)
315
  url_val = item['href'] if item.has_attr('href') else ''
316
  description = ''
317
  source = ''
 
321
  break
322
  return results[:max_results]
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  bing = BingSearch()
 
326
 
327
+ @app.get("/search", response_model=List[BingSearchResult])
328
+ async def text_search(
329
  query: str = Query(..., description="The search keywords."),
330
  region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
331
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
332
  max_results: int = Query(10, description="Maximum number of results to return."),
333
  ):
334
+ """
335
+ Perform a text search on Bing.
336
+ """
337
  try:
338
+ results = bing.text(
339
+ keywords=query,
340
+ region=region,
341
+ safesearch=safesearch,
342
+ max_results=max_results,
343
+ )
344
+ return results
345
  except Exception as e:
346
  raise HTTPException(status_code=500, detail=str(e))
347
 
348
+ @app.get("/suggestions", response_model=List[str])
349
+ async def get_suggestions(
350
  query: str = Query(..., description="The search query for which to fetch suggestions."),
351
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
352
  ):
353
+ """
354
+ Fetches search suggestions for a given query.
355
+ """
356
  try:
357
+ suggestions = bing.suggestions(query=query, region=region)
358
+ return suggestions
359
  except Exception as e:
360
  raise HTTPException(status_code=500, detail=str(e))
361
 
362
+ @app.get("/images", response_model=List[BingImageResult])
363
+ async def image_search(
364
  query: str = Query(..., description="The search keywords for images."),
365
  region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
366
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
367
  max_results: int = Query(10, description="Maximum number of image results to return."),
368
  ):
369
+ """
370
+ Perform an image search on Bing.
371
+ """
372
  try:
373
+ results = bing.images(
374
+ keywords=query,
375
+ region=region,
376
+ safesearch=safesearch,
377
+ max_results=max_results,
378
+ )
379
+ return results
380
  except Exception as e:
381
  raise HTTPException(status_code=500, detail=str(e))
382
 
383
+ @app.get("/news", response_model=List[BingNewsResult])
384
+ async def news_search(
385
  query: str = Query(..., description="The search keywords for news."),
386
  region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
387
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
388
  max_results: int = Query(10, description="Maximum number of news results to return."),
 
 
 
 
 
 
 
 
 
 
 
 
389
  ):
390
  """
391
+ Perform a news search on Bing.
 
392
  """
393
  try:
394
+ results = bing.news(
395
+ keywords=query,
396
+ region=region,
397
+ safesearch=safesearch,
398
+ max_results=max_results,
399
+ )
400
+ return results
401
  except Exception as e:
402
  raise HTTPException(status_code=500, detail=str(e))
403