Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| def scrape_chap_appearances(df, start_chap = 1, end_chap =5000, continue_last = True): | |
| if df.empty == True: | |
| curr_chapts = [] | |
| else: | |
| if continue_last: | |
| curr_chapts = df['Chapter'].tolist() | |
| else: curr_chapts = [] | |
| for i in range(start_chap, end_chap): | |
| if i in curr_chapts: | |
| continue | |
| else: | |
| if i % 100 == 0: | |
| print (i) | |
| # char_list = [] | |
| URL = f'https://onepiece.fandom.com/wiki/Chapter_{i}' | |
| page = requests.get(URL) | |
| soup = BeautifulSoup(page.content, 'html.parser') | |
| table = soup.find('table', class_='CharTable') | |
| for elem in table.find_all('li'): | |
| # char_list.append(elem.text) | |
| df = df.append({'Chapter': int(i), 'Character': elem.text}, ignore_index=True) | |
| return df | |
| # appearance_dict[i] = char_list | |
| # if __name__ == '__main__': | |
| # df = pd.read_csv("data/onedash_chap_appearance.csv") | |
| # newdf = scrape_chap_appearances(df = df, end_chap = 1006) | |
| # newdf.to_csv("data/onedash_chap_appearance.csv", index=False) |