Yoon-gu Hwang commited on
Commit ·
59b8c1b
1
Parent(s): 7a35858
포켓몬 진화 데이터 추가
Browse files- make_evolve_dataset.py +43 -0
- pokemon_evolve.json +0 -0
make_evolve_dataset.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import urllib.request
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
import urllib.parse
|
| 5 |
+
from urllib.parse import urlsplit, quote
|
| 6 |
+
from urllib.request import Request, urlopen
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
import pandas as pd
|
| 10 |
+
|
| 11 |
+
url = 'https://pokemon.fandom.com/ko/wiki/이상해씨_(포켓몬)'
|
| 12 |
+
|
| 13 |
+
url_info = urlsplit(url)
|
| 14 |
+
encoded_url = f'{url_info.scheme}://{url_info.netloc}{quote(url_info.path)}'
|
| 15 |
+
|
| 16 |
+
info = []
|
| 17 |
+
erros = []
|
| 18 |
+
target_number = 1017
|
| 19 |
+
cnt = 0
|
| 20 |
+
for _ in tqdm(range(target_number+2)):
|
| 21 |
+
cnt += 1
|
| 22 |
+
req = Request(encoded_url, headers={'User-Agent': 'Mozilla/5.0'})
|
| 23 |
+
res = urlopen(req)
|
| 24 |
+
html = res.read()
|
| 25 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 26 |
+
|
| 27 |
+
name = soup.find("div", {"class": "name-ko"}).text.strip()
|
| 28 |
+
number = soup.find("div", {"class": "index"}).text.strip()
|
| 29 |
+
doc_text = '\n'.join([p.text.replace('\n', '').strip() for p in soup.find_all("p")])
|
| 30 |
+
types = [poke_type['title'].split(' ')[0].strip() for poke_type in soup.select('tbody > tr > td > div')[0].select('span > a')]
|
| 31 |
+
|
| 32 |
+
evol_tables = soup.find("table", style=re.compile("^margin:auto; text-align:center;"))
|
| 33 |
+
info.append(dict(name=name, evolve=[e.span.text for e in evol_tables.find_all("table")]))
|
| 34 |
+
next_monster = soup.find("table").findAll("a")[-1]['href']
|
| 35 |
+
encoded_url = "https://pokemon.fandom.com" + next_monster
|
| 36 |
+
if number == f"No.{target_number:04d}":
|
| 37 |
+
break
|
| 38 |
+
|
| 39 |
+
if cnt >= target_number:
|
| 40 |
+
break
|
| 41 |
+
|
| 42 |
+
with open('pokemon_evolve.json', 'w') as f:
|
| 43 |
+
json.dump(info, f, ensure_ascii=False, indent=4)
|
pokemon_evolve.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|