Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import sqlite3 | |
| from sqlite3 import Error | |
| def create_connection(db_file): | |
| """ create a database connection to a SQLite database """ | |
| conn = None | |
| conn = sqlite3.connect(db_file) | |
| if conn: | |
| conn.close() | |
| def scrape_char_details(char_link_df, save_file_name): | |
| char_links = char_link_df['Link'].tolist() | |
| df = pd.DataFrame() | |
| for char_link in char_links: | |
| try: | |
| URL = f'https://onepiece.fandom.com{char_link}' | |
| page = requests.get(URL) | |
| soup = BeautifulSoup(page.content, 'html.parser') | |
| table = soup.find('aside', {'role': 'region'} ) | |
| name = table.find("h2", {"data-source": "name"}).text | |
| char_det_dict = {"Name": name} | |
| det_list = ['first','affiliation', 'occupation','residence', 'epithet','status', 'age', 'bounty', 'dfname'] | |
| for det in det_list: | |
| if table.find("div", {"data-source": det}) is not None: | |
| text_value = table.find("div", {"data-source": det}).find("div", {"class": "pi-data-value pi-font"}).text | |
| if text_value is not None: | |
| char_det_dict[det] = text_value | |
| else: | |
| char_det_dict[det] = [i.get("title") for i in table.find("div", {"data-source": det}).find("div").find_all("a")] | |
| df = df.append(char_det_dict, ignore_index=True) | |
| except: | |
| print(f'Unable to process: {char_link}') | |
| continue | |
| df.to_csv(save_file_name, index=False) | |
| # print (char_det_dict) | |
| if __name__ == '__main__': | |
| # dbname = r"data/OPdash.db" | |
| # create_connection(dbname) | |
| char_link_df = pd.read_csv('data/char_link.csv') | |
| scrape_char_details(char_link_df, save_file_name = "data/char_details.csv") | |