| import json |
| import requests |
| import re |
| import urllib.parse |
|
|
| def _extractBingImages(html): |
| pattern = r'mediaurl=(.*?)&.*?expw=(\d+).*?exph=(\d+)' |
| matches = re.findall(pattern, html) |
| result = [] |
|
|
| for match in matches: |
| url, width, height = match |
| if url.endswith('.jpg') or url.endswith('.png') or url.endswith('.jpeg'): |
| result.append({'url': urllib.parse.unquote(url), 'width': int(width), 'height': int(height)}) |
|
|
| return result |
|
|
|
|
| def _extractGoogleImages(html): |
| images = [] |
| regex = re.compile(r"AF_initDataCallback\({key: 'ds:1', hash: '2', data:(.*?), sideChannel: {}}\);") |
| match = regex.search(html) |
| if match: |
| dz = json.loads(match.group(1)) |
| for c in dz[56][1][0][0][1][0]: |
| try: |
| thing = list(c[0][0].values())[0] |
| images.append(thing[1][3]) |
| except: |
| pass |
| return images |
|
|
|
|
| def getBingImages(query, retries=5): |
| query = query.replace(" ", "+") |
| images = [] |
| tries = 0 |
| while(len(images) == 0 and tries < retries): |
| response = requests.get(f"https://www.bing.com/images/search?q={query}&first=1") |
| if(response.status_code == 200): |
| images = _extractBingImages(response.text) |
| else: |
| print("Error While making bing image searches", response.text) |
| raise Exception("Error While making bing image searches") |
| if(images): |
| return images |
| raise Exception("Error While making bing image searches") |
|
|
|
|