Spaces:

tappyness1
/

one_dash

Sleeping

one_dash / src /scraper_chap_appearance.py

tappyness1

initial commit

cb22296 over 2 years ago

1.2 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	def scrape_chap_appearances(df, start_chap = 1, end_chap =5000, continue_last = True):
	if df.empty == True:
	curr_chapts = []
	else:
	if continue_last:
	curr_chapts = df['Chapter'].tolist()
	else: curr_chapts = []
	for i in range(start_chap, end_chap):
	if i in curr_chapts:
	continue
	else:
	if i % 100 == 0:
	print (i)
	# char_list = []
	URL = f'https://onepiece.fandom.com/wiki/Chapter_{i}'
	page = requests.get(URL)

	soup = BeautifulSoup(page.content, 'html.parser')

	table = soup.find('table', class_='CharTable')

	for elem in table.find_all('li'):
	# char_list.append(elem.text)
	df = df.append({'Chapter': int(i), 'Character': elem.text}, ignore_index=True)
	return df
	# appearance_dict[i] = char_list

	# if __name__ == '__main__':
	# df = pd.read_csv("data/onedash_chap_appearance.csv")
	# newdf = scrape_chap_appearances(df = df, end_chap = 1006)
	# newdf.to_csv("data/onedash_chap_appearance.csv", index=False)