Spaces:
Sleeping
Sleeping
| import requests, re | |
| from bs4 import BeautifulSoup | |
| def scrape_page(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| text = soup.get_text() | |
| text = text.strip() | |
| text = text.replace("\n", "") | |
| pattern = re.compile("[\u3131-\u3163\uac00-\ud7a3]+") | |
| if text != "": | |
| print(text) | |
| return text | |
| def scrape_recursive(url, output_file): | |
| text = scrape_page(url) | |
| if text is not None: | |
| with open(output_file, "w", encoding='utf-8') as f: | |
| f.write(text) | |
| url = "https://www.jbnu.ac.kr/eng/?menuID=350&mode=view&no=" | |
| for k in range(1, 320): | |
| scrape_recursive(url+str(k), "data/output{}.txt".format(k)) |