Kyo-Kai commited on
Commit
f55c01c
·
verified ·
1 Parent(s): e2d9f8d

Update sites/pixiv.py

Browse files
Files changed (1) hide show
  1. sites/pixiv.py +132 -136
sites/pixiv.py CHANGED
@@ -1,20 +1,25 @@
1
- import os
2
- import re
3
  import sys
4
- import time
5
 
 
 
 
 
6
  from selenium.webdriver.common.action_chains import ActionChains
7
  from selenium.webdriver.common.by import By
8
- from random import randint
 
 
 
9
  from datetime import date, datetime
10
- from commands.driver_instance import create_url_headers, tab_handler, download_file
 
11
  from commands.exec_path import imgList
12
- from commands.universal import *
13
  from ai.classifying_ai import img_classifier
14
 
15
- sys.path.append("..")
16
 
17
- async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,searchTypes,viewRestriction,imageControl,
18
  n_likes,n_bookmarks,n_views, start_date=0,end_date=0, user_name=0, pass_word=0):
19
  global image_locations, image_names, ultimatium, ai_mode, prev_search
20
  image_names = imgList(mode=1)
@@ -22,7 +27,6 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
22
  prev_search = 0
23
  link = "https://www.pixiv.net/tags/illustration"
24
  success_login = False
25
- is_search_continued = 0
26
 
27
  filters = {
28
  "likes": 0 if not n_likes else n_likes,
@@ -35,42 +39,38 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
35
  end_date = date.today() if not date_handler(end_date) else end_date
36
 
37
  if 1 in imageControl:
38
- is_search_continued = continue_Search(driver, link, mode=0)
39
  else:
40
  driver.get(link)
41
 
42
  # Will use those when not logged in
 
 
 
43
  search_param = {
44
- "bar_search": generate_xpath_query("//input", "@placeholder", "search works"),
45
- "li_search": generate_xpath_query("//h3", "text()", "works", "illustrations and manga", "illustrations") + "/ancestor::section[1]/div[2]//li",
46
- "premium_search": generate_xpath_query("//h3", 'text()', 'popular works') + "/ancestor::section[1]/div[2]//li",
47
  }
48
 
49
  # Check if logged in otherwise log in with credentials
50
  try:
51
- # Check for create an account (only appears for non-logged in users). Must wait page.
52
- contains_works(driver, search_param["bar_search"])
53
- time.sleep(1)
54
- logged_out_button = driver.find_elements(By.XPATH, case_insensitive_xpath_contains("//a", 'Create an account'))
55
-
56
- if not logged_out_button:
57
  success_login = True
58
- elif user_name and pass_word:
59
- print("Logging in...")
60
- if login_handler(driver, exec_path, user_name, pass_word):
61
- success_login = True
62
 
63
  if not success_login:
64
  print("Failed! You are not logged in...")
65
 
66
- except Exception as e:
67
- print(f"Failed! You are not logged in... Exception: {e}")
 
68
 
69
- if not success_login:
70
- is_lang_en(driver)
71
-
72
- if not is_search_continued:
73
- searchQuery(user_search, driver, search_param["bar_search"], mode=0)
74
  time.sleep(2)
75
 
76
  if start_date and not success_login:
@@ -81,18 +81,20 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
81
  driver.get(cur_url[0] + f"?scd={start_date}&ecd={end_date}&" + cur_url[1])
82
  time.sleep(2)
83
 
 
 
 
 
84
  ultimatium = 1 if 0 in imageControl else 0
85
- ai_mode = 1 if 3 in imageControl else 0
 
86
 
87
  if not contains_works(driver, search_param["li_search"]):
88
  print("No works found...")
89
  return []
90
 
91
- # Search for images in the premium section
92
- premium_pics = 0
93
- if 0 in searchTypes:
94
- await search_image(driver, exec_path, filters, search_param)
95
- premium_pics = len(image_locations)
96
 
97
  # Switch to english
98
  try:
@@ -101,10 +103,9 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
101
  except:
102
  pass
103
 
104
-
105
  # Apply filters if logged in
106
- try:
107
- if success_login:
108
  driver.find_element(By.XPATH, "/html/body/div[1]/div[2]/div/div[3]/div/div[5]/nav/a[2]").click()
109
  print("Illustrations only")
110
  time.sleep(1)
@@ -112,33 +113,39 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
112
  mode = ""
113
  order = ""
114
 
115
- if 0 in viewRestriction and 1 in viewRestriction:
116
  print("PG Friendly and r-18")
117
- elif 0 in viewRestriction:
118
  mode = "mode=safe&"
119
  print("PG Friendly")
120
- elif 1 in viewRestriction:
121
  mode = "mode=r18&"
122
  print("r-18")
123
- if 2 in imageControl:
124
  order = "order=date&"
125
  print("Order by oldest")
126
 
127
  cur_url = driver.current_url.split("?")
128
  driver.get(cur_url[0] + f"?{order}{mode}" + cur_url[1])
129
- else:
130
- if contains_works(driver, case_insensitive_xpath_contains("//div", 'Show all')):
131
- driver.find_element(By.XPATH, case_insensitive_xpath_contains("//div", 'Show all')).click()
132
- except Exception as e:
133
- print(f"Failed to apply filters or show all works... Exception: {e}")
 
 
 
 
 
 
134
 
135
  prev_search = len(image_locations)
136
  curr_page = driver.current_url
137
 
138
- if 1 in searchTypes or searchTypes == []:
139
- while len(image_locations) - premium_pics < num_pics*num_pages:
140
- await search_image(driver,exec_path,filters,search_param=search_param,searchLimit=searchLimit)
141
- if len(image_locations) < num_pics*num_pages and not valid_page(driver, ['XPATH', 'XPATH'], ['//*[@class="sc-xhhh7v-0 kYtoqc"]', './/a']):
142
  print("Reached end of search results")
143
  break
144
  driver.quit()
@@ -146,7 +153,7 @@ async def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,
146
  return image_locations
147
 
148
 
149
- async def search_image(driver,exec_path,filters,search_param,searchLimit={"pagecount": 1, "imagecount": 99}):
150
  # Searches using premium or freemium
151
  search_type = awaitPageLoad(driver=driver,searchLimit=searchLimit,search_param=search_param)
152
  if search_type == -1:
@@ -154,28 +161,31 @@ async def search_image(driver,exec_path,filters,search_param,searchLimit={"pagec
154
 
155
  # The main image searcher
156
  for page in range(searchLimit["pagecount"]):
157
- temp_img_len = len(image_locations)
158
- contains_works(driver, search_param["li_search"] + "//div//a")
 
 
 
159
  images = search_image_type(search_type, driver, search_param=search_param)
160
 
161
  for image in images:
 
162
  if len(image_locations) - prev_search >= searchLimit["imagecount"]*searchLimit["pagecount"] or len(image_locations) - temp_img_len >= searchLimit["imagecount"]:
163
  break
164
-
165
- image = image.find_element(By.XPATH, ".//div//a")
166
  imageLink = image.find_elements(By.XPATH, ".//img")
167
 
168
  if image.get_attribute("href").rsplit("/", 1)[-1] not in image_names:
169
- if ai_mode == 1 and await process_ai_mode(imageLink, image, driver, exec_path):
170
- continue # Skip the image if AI mode is on and it is not approved
171
 
172
  try:
173
  if sum(filters.values()) == 0 and len(imageLink): # Dl the image directly from the grid
174
- await thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path)
175
 
176
  else: # Dl the image from the image page (opens a new tab)
177
  driver, tempImg = tab_handler(driver=driver, image=image)
178
- contains_works(driver, "//div[@role='presentation']")
179
  tempDL = driver.find_element(By.XPATH, "//div[@role='presentation']//img")
180
 
181
  imagePopularity = parseImageData(filters=filters,
@@ -185,21 +195,21 @@ async def search_image(driver,exec_path,filters,search_param,searchLimit={"pagec
185
  if filterOptions(filters, imagePopularity=imagePopularity): # Check if image filters are satisfied
186
  tempDLLink = tempDL.get_attribute("src")
187
 
188
- # Dl the original resolution image
189
  if ultimatium:
190
  tempDLLink = tempDLLink.replace("img-master", "img-original"
191
  ).replace("_master1200", "")
192
 
193
- await download_image(imageLink=tempDLLink, exec_path=exec_path, driver=driver)
194
  else:
195
  print("\nImage filters not satisfied...")
196
  driver = tab_handler(driver=driver)
197
  time.sleep(0.3)
198
 
199
  # In case of stale element or any other errors
200
- except Exception as e:
201
  if driver.window_handles[-1] != driver.window_handles[0]:
202
- print(f"\nI ran into an error, moving on...{e}")
203
  driver = tab_handler(driver=driver)
204
  time.sleep(randint(1, 3) + randint(0, 9) / 10)
205
  continue
@@ -207,7 +217,7 @@ async def search_image(driver,exec_path,filters,search_param,searchLimit={"pagec
207
  else:
208
  print("\nImage already exists, moving to another image...")
209
  save_Search(driver, mode=0)
210
- if not valid_page(driver, ['XPATH', 'XPATH'], ['//*[@class="sc-xhhh7v-0 kYtoqc"]', './/a']):
211
  break
212
 
213
 
@@ -217,7 +227,9 @@ def login_handler(driver, exec_path, user_name, pass_word):
217
  login_btn = driver.find_elements(By.XPATH, "//*[@class='sc-oh3a2p-4 gHKmNu']//a")[1]
218
  login_btn.click()
219
 
220
- contains_works(driver, "//*[@class='sc-2o1uwj-0 elngKN']")
 
 
221
  user_btn = driver.find_element(
222
  By.XPATH, "//*[@class='sc-2o1uwj-0 elngKN']"
223
  ).find_elements(By.TAG_NAME, "fieldset")
@@ -229,80 +241,50 @@ def login_handler(driver, exec_path, user_name, pass_word):
229
  actions.click(user_btn[1]).send_keys(pass_word).perform()
230
 
231
  # Log in button
232
- driver.find_element(By.XPATH,"//button[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'log in')]").click()
233
 
234
  return True
235
 
236
 
237
- async def download_image(imageLink, exec_path, driver, mode=1):
238
  tempDLName = imageLink.rsplit("/", 1)[-1]
239
  img_loc = f"./{exec_path.folder_path}/{tempDLName}"
240
- headers = get_selenium_headers(driver)
241
-
242
- for attempt in range(2):
243
- try:
244
- await download_file(imageLink, img_loc, headers)
245
- except Exception as e: # Catch the exception raised by download_file
246
- if attempt < 1: # Only retry if this is the first attempt
247
- imageLink = imageLink.rsplit(".",1)[0]+".png"
248
- headers = installUrlOpeners(imageLink)
249
- else:
250
- raise # Re-raise the exception if this is the second attempt
251
- else:
252
- print(f"\n{imageLink}")
253
- if mode:
254
- image_locations.append(f"./{exec_path.folder_path}/{tempDLName}")
255
- image_names.append(f"{tempDLName.split('.')[0]}")
256
- return img_loc
257
 
258
 
259
- async def thumbnailDownloader(imageLink, image, driver, exec_path, mode=1):
260
  imageLink = image_type(imageLink=imageLink, mode=mode)
261
 
262
  action = ActionChains(driver=driver)
263
  action.move_to_element(image.find_element(By.XPATH, ".//img")).perform()
264
 
265
- return await download_image(imageLink=imageLink, exec_path=exec_path, driver=driver, mode=mode)
266
 
267
 
268
- def is_lang_en(driver):
269
- try:
270
- anchors = driver.find_elements(By.XPATH, '//*[@class="sc-93qi7v-2 hbGpVM"]//a')
271
- for n_iter,anchor in enumerate(anchors):
272
- if anchor.get_attribute("lang") =="en":
273
- driver.execute_script("arguments[0].click();", anchor)
274
- except Exception as e:
275
- # You are either in english or they changed the xpath
276
- pass
277
-
278
-
279
- ######## HEADER CONSTRUCTION ########
280
  def installUrlOpeners(driver,mode=1): # Mode 0 means its a thumbnail
281
  if ultimatium and mode:
282
- header = create_url_headers(driver)
283
  else:
284
- header = create_url_headers(driver.current_url)
285
- return header
286
-
287
- def get_selenium_headers(driver):
288
- headers = {
289
- 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
290
- 'Accept-Encoding': 'gzip, deflate, br, zstd',
291
- 'Accept-Language': 'en-US,en;q=0.9,it;q=0.8,ru;q=0.7,ja;q=0.6,en-GB;q=0.5',
292
- 'Cache-Control': 'no-cache',
293
- 'Pragma': 'no-cache',
294
- 'Referer': 'https://www.pixiv.net/', # Important to include referer
295
- 'User-Agent': driver.execute_script("return navigator.userAgent;"), # Get dynamic User-Agent from browser
296
- 'Sec-CH-UA': driver.execute_script("return navigator.userAgentData.brands.map(b => b.brand + ';v=' + b.version).join(', ');"),
297
- 'Sec-CH-UA-Mobile': '?0',
298
- 'Sec-CH-UA-Platform': driver.execute_script("return navigator.userAgentData.platform;"),
299
- 'Sec-Fetch-Dest': 'image',
300
- 'Sec-Fetch-Mode': 'no-cors',
301
- 'Sec-Fetch-Site': 'cross-site',
302
- 'Connection': 'keep-alive',
303
- 'Priority': 'i',
304
- }
305
- return headers
306
 
307
 
308
  ######## HELPER FUNCTIONS (UNLIKELY TO CHANGE) ########
@@ -331,7 +313,11 @@ def awaitPageLoad(driver, searchLimit, search_param, search_type=0):
331
  # Waits on the page to load (for popular or freemium)
332
  if searchLimit["imagecount"] == 99:
333
  try:
334
- contains_works(driver, search_param["premium_search"], timeout=10)
 
 
 
 
335
  print("Premium section found, searching for images...")
336
  except:
337
  print("No popular section")
@@ -339,7 +325,9 @@ def awaitPageLoad(driver, searchLimit, search_param, search_type=0):
339
  return search_type
340
  else:
341
  try:
342
- contains_works(driver, search_param["li_search"])
 
 
343
  print("\nFreemium section found, searching for images...")
344
  except:
345
  driver.refresh()
@@ -364,6 +352,23 @@ def parseImageData(Data, filters):
364
  return parsedData
365
 
366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  def date_handler(sel_date):
368
  temp = sel_date.split("-")
369
  try:
@@ -373,10 +378,10 @@ def date_handler(sel_date):
373
  return 1
374
 
375
 
376
- async def process_ai_mode(imageLink, image, driver, exec_path):
377
  try:
378
  # Dl the image thumbnail from the grid
379
- img_loc = await thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path, mode=0)
380
 
381
  if img_classifier(img_loc):
382
  print("AI Mode: I approve this image")
@@ -386,13 +391,4 @@ async def process_ai_mode(imageLink, image, driver, exec_path):
386
  return True
387
  os.remove(img_loc)
388
  except:
389
- print("AI Mode: Skipping this image due to an error")
390
  return True
391
-
392
-
393
- def case_insensitive_xpath_contains(xpath, text):
394
- return f"{xpath}[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{text.lower()}')]"
395
-
396
-
397
- def generate_xpath_query(base_xpath, attribute, *args):
398
- return base_xpath + "[" + " or ".join(f"translate({attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = '{arg.lower()}'" for arg in args) + "]"
 
 
 
1
  import sys
2
+ sys.path.append("..")
3
 
4
+ import time
5
+ import urllib.request
6
+ import os
7
+ import re
8
  from selenium.webdriver.common.action_chains import ActionChains
9
  from selenium.webdriver.common.by import By
10
+ from selenium.webdriver.common.keys import Keys
11
+ from selenium.webdriver.support.ui import WebDriverWait
12
+ from selenium.webdriver.support import expected_conditions as EC
13
+ from selenium.common.exceptions import TimeoutException
14
  from datetime import date, datetime
15
+ from random import randint
16
+ from commands.driver_instance import create_url_headers, tab_handler
17
  from commands.exec_path import imgList
18
+ from commands.universal import searchQuery, save_Search, continue_Search, contains_works
19
  from ai.classifying_ai import img_classifier
20
 
 
21
 
22
+ def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,searchTypes,viewRestriction,imageControl,
23
  n_likes,n_bookmarks,n_views, start_date=0,end_date=0, user_name=0, pass_word=0):
24
  global image_locations, image_names, ultimatium, ai_mode, prev_search
25
  image_names = imgList(mode=1)
 
27
  prev_search = 0
28
  link = "https://www.pixiv.net/tags/illustration"
29
  success_login = False
 
30
 
31
  filters = {
32
  "likes": 0 if not n_likes else n_likes,
 
39
  end_date = date.today() if not date_handler(end_date) else end_date
40
 
41
  if 1 in imageControl:
42
+ continue_Search(driver, link, mode=0)
43
  else:
44
  driver.get(link)
45
 
46
  # Will use those when not logged in
47
+ bar_search = '//input[@placeholder="Search works"]'
48
+ li_search = "//h3[contains(text(), 'Works') or contains(text(), 'Illustrations and Manga') or contains(text(), 'Illustrations')]/ancestor::section[1]/div[2]//li"
49
+ premium_search = "//h3[contains(text(), 'Popular works')]/ancestor::section[1]/div[2]//li"
50
  search_param = {
51
+ "bar_search": bar_search,
52
+ "li_search": li_search,
53
+ "premium_search": premium_search,
54
  }
55
 
56
  # Check if logged in otherwise log in with credentials
57
  try:
58
+ # Explicit wait to check for favorite button (only appears for logged in users)
59
+ WebDriverWait(driver, timeout=6).until(
60
+ EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Add to your favorites')]")))
61
+
62
+ if driver.find_elements(By.XPATH, "//button[contains(text(), 'Add to your favorites')]"):
 
63
  success_login = True
 
 
 
 
64
 
65
  if not success_login:
66
  print("Failed! You are not logged in...")
67
 
68
+ except:
69
+ print("Failed! You are not logged in...")
70
+ pass
71
 
72
+ if 1 not in imageControl:
73
+ searchQuery(user_search, driver, search_param["bar_search"], isLoggedIn=success_login)
 
 
 
74
  time.sleep(2)
75
 
76
  if start_date and not success_login:
 
81
  driver.get(cur_url[0] + f"?scd={start_date}&ecd={end_date}&" + cur_url[1])
82
  time.sleep(2)
83
 
84
+ premiumSearch = 1 if 0 in searchTypes else 0
85
+ freemiumSearch = 1 if 1 in searchTypes else 0
86
+ pg_friendly = 1 if 0 in viewRestriction else 0
87
+ r_18 = 1 if 1 in viewRestriction else 0
88
  ultimatium = 1 if 0 in imageControl else 0
89
+ order_by_oldest = 1 if 2 in imageControl else 0
90
+ ai_mode = 1
91
 
92
  if not contains_works(driver, search_param["li_search"]):
93
  print("No works found...")
94
  return []
95
 
96
+ if premiumSearch == 1:
97
+ search_image(driver, exec_path, filters, search_param)
 
 
 
98
 
99
  # Switch to english
100
  try:
 
103
  except:
104
  pass
105
 
 
106
  # Apply filters if logged in
107
+ if success_login:
108
+ try:
109
  driver.find_element(By.XPATH, "/html/body/div[1]/div[2]/div/div[3]/div/div[5]/nav/a[2]").click()
110
  print("Illustrations only")
111
  time.sleep(1)
 
113
  mode = ""
114
  order = ""
115
 
116
+ if pg_friendly == 1 and r_18 == 1:
117
  print("PG Friendly and r-18")
118
+ elif pg_friendly == 1:
119
  mode = "mode=safe&"
120
  print("PG Friendly")
121
+ elif r_18 == 1:
122
  mode = "mode=r18&"
123
  print("r-18")
124
+ if order_by_oldest == 1:
125
  order = "order=date&"
126
  print("Order by oldest")
127
 
128
  cur_url = driver.current_url.split("?")
129
  driver.get(cur_url[0] + f"?{order}{mode}" + cur_url[1])
130
+ except:
131
+ pass
132
+
133
+ # Click show all results
134
+ try:
135
+ time.sleep(1)
136
+ show_all_div = driver.find_element(By.XPATH, "//div[contains(text(), 'Show all')]")
137
+ if show_all_div:
138
+ driver.find_element(By.XPATH, '//*[@class="sc-d98f2c-0 sc-s46o24-1 dAXqaU"]').click()
139
+ except:
140
+ pass
141
 
142
  prev_search = len(image_locations)
143
  curr_page = driver.current_url
144
 
145
+ if freemiumSearch:
146
+ while len(image_locations) < num_pics*num_pages:
147
+ search_image(driver,exec_path,filters,search_param=search_param,searchLimit=searchLimit)
148
+ if not valid_page(driver) and len(image_locations) < num_pics*num_pages:
149
  print("Reached end of search results")
150
  break
151
  driver.quit()
 
153
  return image_locations
154
 
155
 
156
+ def search_image(driver,exec_path,filters,search_param,searchLimit={"pagecount": 1, "imagecount": 99}):
157
  # Searches using premium or freemium
158
  search_type = awaitPageLoad(driver=driver,searchLimit=searchLimit,search_param=search_param)
159
  if search_type == -1:
 
161
 
162
  # The main image searcher
163
  for page in range(searchLimit["pagecount"]):
164
+
165
+ temp_img_len = len(image_locations)
166
+ WebDriverWait(driver, timeout=9).until(
167
+ EC.presence_of_element_located(
168
+ (By.XPATH, search_param["li_search"] + "//a")))
169
  images = search_image_type(search_type, driver, search_param=search_param)
170
 
171
  for image in images:
172
+ time.sleep(2)
173
  if len(image_locations) - prev_search >= searchLimit["imagecount"]*searchLimit["pagecount"] or len(image_locations) - temp_img_len >= searchLimit["imagecount"]:
174
  break
175
+ image = image.find_element(By.XPATH, "." + "/" + "/a")
 
176
  imageLink = image.find_elements(By.XPATH, ".//img")
177
 
178
  if image.get_attribute("href").rsplit("/", 1)[-1] not in image_names:
179
+ if ai_mode == 1 and process_ai_mode(imageLink, image, driver, exec_path):
180
+ continue
181
 
182
  try:
183
  if sum(filters.values()) == 0 and len(imageLink): # Dl the image directly from the grid
184
+ thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path)
185
 
186
  else: # Dl the image from the image page (opens a new tab)
187
  driver, tempImg = tab_handler(driver=driver, image=image)
188
+ WebDriverWait(driver, timeout=11).until(EC.presence_of_element_located((By.XPATH, "//div[@role='presentation']")))
189
  tempDL = driver.find_element(By.XPATH, "//div[@role='presentation']//img")
190
 
191
  imagePopularity = parseImageData(filters=filters,
 
195
  if filterOptions(filters, imagePopularity=imagePopularity): # Check if image filters are satisfied
196
  tempDLLink = tempDL.get_attribute("src")
197
 
198
+ # Dl the original rez image
199
  if ultimatium:
200
  tempDLLink = tempDLLink.replace("img-master", "img-original"
201
  ).replace("_master1200", "")
202
 
203
+ download_image(imageLink=tempDLLink, exec_path=exec_path, driver=driver)
204
  else:
205
  print("\nImage filters not satisfied...")
206
  driver = tab_handler(driver=driver)
207
  time.sleep(0.3)
208
 
209
  # In case of stale element or any other errors
210
+ except:
211
  if driver.window_handles[-1] != driver.window_handles[0]:
212
+ print("\nI ran into an error, moving on...")
213
  driver = tab_handler(driver=driver)
214
  time.sleep(randint(1, 3) + randint(0, 9) / 10)
215
  continue
 
217
  else:
218
  print("\nImage already exists, moving to another image...")
219
  save_Search(driver, mode=0)
220
+ if not valid_page(driver):
221
  break
222
 
223
 
 
227
  login_btn = driver.find_elements(By.XPATH, "//*[@class='sc-oh3a2p-4 gHKmNu']//a")[1]
228
  login_btn.click()
229
 
230
+ WebDriverWait(driver, timeout=11).until(
231
+ EC.presence_of_element_located((By.XPATH, "//*[@class='sc-2o1uwj-0 elngKN']"))
232
+ )
233
  user_btn = driver.find_element(
234
  By.XPATH, "//*[@class='sc-2o1uwj-0 elngKN']"
235
  ).find_elements(By.TAG_NAME, "fieldset")
 
241
  actions.click(user_btn[1]).send_keys(pass_word).perform()
242
 
243
  # Log in button
244
+ driver.find_element(By.XPATH,"//button[contains(text(), 'Log In')]").click()
245
 
246
  return True
247
 
248
 
249
+ def download_image(imageLink, exec_path, driver, mode=1):
250
  tempDLName = imageLink.rsplit("/", 1)[-1]
251
  img_loc = f"./{exec_path.folder_path}/{tempDLName}"
252
+ if not ultimatium or not mode:
253
+ installUrlOpeners(driver=driver,mode=0)
254
+ else:
255
+ installUrlOpeners(imageLink)
256
+ try:
257
+ requestUrlretrieve(imageLink=imageLink, img_loc=img_loc)
258
+ except:
259
+ imageLink = imageLink.rsplit(".",1)[0]+".png"
260
+ requestUrlretrieve(imageLink, img_loc=img_loc)
261
+
262
+ print(f"\n{imageLink}")
263
+ if mode:
264
+ image_locations.append(f"./{exec_path.folder_path}/{tempDLName}")
265
+ image_names.append(f"{tempDLName.split('.')[0]}")
266
+ else:
267
+ return img_loc
 
268
 
269
 
270
+ def thumbnailDownloader(imageLink, image, driver, exec_path, mode=1):
271
  imageLink = image_type(imageLink=imageLink, mode=mode)
272
 
273
  action = ActionChains(driver=driver)
274
  action.move_to_element(image.find_element(By.XPATH, ".//img")).perform()
275
 
276
+ return download_image(imageLink=imageLink, exec_path=exec_path, driver=driver, mode=mode)
277
 
278
 
279
+ ######## URLLIB LIBRARY ########
 
 
 
 
 
 
 
 
 
 
 
280
  def installUrlOpeners(driver,mode=1): # Mode 0 means its a thumbnail
281
  if ultimatium and mode:
282
+ urllib.request.install_opener(create_url_headers(driver))
283
  else:
284
+ urllib.request.install_opener(create_url_headers(driver.current_url))
285
+
286
+ def requestUrlretrieve(imageLink, img_loc): # Download the image
287
+ urllib.request.urlretrieve(imageLink, img_loc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
 
290
  ######## HELPER FUNCTIONS (UNLIKELY TO CHANGE) ########
 
313
  # Waits on the page to load (for popular or freemium)
314
  if searchLimit["imagecount"] == 99:
315
  try:
316
+ WebDriverWait(driver, timeout=12).until(
317
+ EC.presence_of_element_located(
318
+ (By.XPATH, search_param["premium_search"])
319
+ )
320
+ )
321
  print("Premium section found, searching for images...")
322
  except:
323
  print("No popular section")
 
325
  return search_type
326
  else:
327
  try:
328
+ WebDriverWait(driver, timeout=12).until(
329
+ EC.presence_of_element_located((By.XPATH, search_param["li_search"]))
330
+ )
331
  print("\nFreemium section found, searching for images...")
332
  except:
333
  driver.refresh()
 
352
  return parsedData
353
 
354
 
355
+ def valid_page(driver):
356
+ cur_url = driver.current_url
357
+ try:
358
+ next_page = (
359
+ driver.find_element(By.XPATH, '//*[@class="sc-xhhh7v-0 kYtoqc"]')
360
+ .find_elements(By.XPATH, ".//a")[-1]
361
+ .get_attribute("href")
362
+ )
363
+ if cur_url == next_page:
364
+ return 0
365
+ if next_page:
366
+ driver.get(next_page)
367
+ return 1
368
+ except:
369
+ return 0
370
+
371
+
372
  def date_handler(sel_date):
373
  temp = sel_date.split("-")
374
  try:
 
378
  return 1
379
 
380
 
381
+ def process_ai_mode(imageLink, image, driver, exec_path):
382
  try:
383
  # Dl the image thumbnail from the grid
384
+ img_loc = thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path, mode=0)
385
 
386
  if img_classifier(img_loc):
387
  print("AI Mode: I approve this image")
 
391
  return True
392
  os.remove(img_loc)
393
  except:
 
394
  return True