Create Other Builds/Counter/1a1a.py

d42b500 verified about 2 years ago

6.84 kB

	import requests
	from bs4 import BeautifulSoup
	import re
	import time
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.chrome.service import Service

	url = "https://www.deviantart.com/amber2024/gallery"




	def get_values(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.content, 'html.parser')
	spans = soup.findAll('span', class_="_3AClx")
	favs = 0
	comments = 0
	views = 0
	#print(spans)
	# Iterate over each span in the list
	c = 0
	for span in spans:
	# Extract the numeric value and the unit (Favourites, Comment, Views)
	print('\n'+str(list(span))+str(c)+'\n')
	value = str(list(span)[0]).strip('</span>')
	unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=></').rstrip('/span>')
	#print(value)
	# Convert value to numeric format
	if 'K' in value:
	value = float(value[:-1]) * 1000
	else:
	#print(str(len(value))+'val'+value)
	value = int(value)
	print(unit)

	# Check the unit and update the corresponding variable
	if unit == 'Favourites<' or unit == 'Favourite':
	favs = value
	elif unit == 'Comments<' or unit == 'Comment<':
	comments = value
	elif unit == 'Views<' or unit == 'View':
	views = value
	c+=1
	#print(favs,comments,views)
	return (favs,comments,views)

	def get_tags(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.content, 'html.parser')
	spans = soup.findAll('span', class_="_1nwad")
	tags = []
	#print(spans)
	# Iterate over each span in the list
	c = 0
	for span in spans:
	tags.append(str(span).split('>')[1].split('<')[0])
	print(tags,spans)
	return tags


	def get_links(url,page=1):
	service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver') # Path to chromedriver executable
	driver = webdriver.Chrome(service=service)
	driver.get(url+'?page='+str(page))

	# Scroll to the bottom of the page
	last_height = driver.execute_script("return document.body.scrollHeight")
	while True:
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(0) # Adjust sleep time as needed
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break

	last_height = new_height

	soup = BeautifulSoup(driver.page_source, 'html.parser')
	links = []
	for link in soup.find_all('a'):
	links.append(link.get('href'))

	#driver.quit()
	return links



	drawings = []
	names = []

	def recursion(url):
	global get_links, drawings, names
	recur = []
	cecant = get_links(url)#tmp
	secant = False
	cocecant = 1
	cosecant = []

	for i in cecant:
	if '/all' in i and not '/all?' in i:
	secant = True
	recur.append(i)
	if '?page=' in i:
	cosecant.append(int(i.split('?page=')[1]))
	print(cosecant,'cosecant')
	recur = list(set(recur))

	try:
	cocecant = max(cosecant)
	except:
	print('Only One Page')
	print(cocecant,'cocecant')
	if secant != True:
	for i in cecant:
	if "/gallery/" in i:
	recur.append(i)

	print(recur,'reccc')
	for j in recur:
	cecant = get_links(j)#tmp
	secant = False
	cocecant = 1
	cosecant = []

	for i in cecant:
	if '/all' in i and not '/all?' in i:
	secant = True
	recur.append(i)
	if '?page=' in i:
	cosecant.append(int(i.split('?page=')[1]))
	recur = list(set(recur))
	print(recur)
	print(cosecant,'cosc')
	try:
	cocecant = max(cosecant)
	except:
	print('Only One Page')

	for z in range(1,cocecant+1):
	print(z)
	x = get_links(j,page=z)

	flag = False
	alled = False #If there is a folder for All Deviations
	for k in x:
	if '/art' in k:
	flag = True
	break

	if flag == True:
	print(x,'xxxxxxxxx')
	for c in x:
	if "/art/" in c and not "#comments" in c and not c in drawings:
	drawings.append(c)
	names.append(c.split('/art/')[1])
	else:
	break

	drawings = list(set(drawings))

	#print(get_links(url))
	recursion(url)
	#print(drawings)

	finalle = []
	names = []
	def recur_works():
	global finalle
	for i in drawings:
	finalle.append(get_values(i))

	import threading

	drawings = list(set(drawings))
	tag_sets = []
	# Function to process one item from the drawings list
	def process_item(item):
	global tag_sets
	finalle.append(get_values(item))
	names.append(item.split('/art/')[1])
	tag_sets.append(get_tags(item))

	# Divide the drawings into chunks for each thread
	num_threads = 1
	chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1
	chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)]

	# Create and start worker threads
	threads = []
	for chunk in chunks:
	for drawing in chunk:
	# Create a new thread for each item (or group them per chunk as needed)
	t = threading.Thread(target=process_item, args=(drawing,))
	threads.append(t)
	t.start()

	# Wait for all threads to complete
	for t in threads:
	t.join()


	def get_summation():
	print(finalle)
	favs = 0
	comm = 0
	view = 0
	for i in finalle:
	if i!=False:
	favs += i[0]
	comm += i[1]
	view += i[2]
	print('favs:',favs,'comm:',comm,'view:',view, 'names:', names)

	def get_tag_summation():
	post_processed_tags = []
	indexx = []
	for c in range(len(tag_sets)):
	i = tag_sets[c]
	for j in i:
	if j in indexx:
	post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1])
	post_processed_tags[indexx.index(j)][2] += 1
	post_processed_tags[indexx.index(j)][1][0] += finalle[c][0]
	post_processed_tags[indexx.index(j)][1][1] += finalle[c][1]
	post_processed_tags[indexx.index(j)][1][2] += finalle[c][2]

	else:
	post_processed_tags.append([j, finalle[c], 1])
	indexx.append(j)
	return post_processed_tags

	#recur_works()
	get_summation()
	e = get_tag_summation()

	print(e)